In [1]:
import os
os.chdir("..")

In [2]:
from utils import get_classification_model
from torch_geometric.data import Data
import glob
from tqdm import tqdm
import torch

import pickle
import gzip
from params import CLASSIFIER_PARAMS
from datasets import Dataset

In [3]:
CLASSIFIER_PARAMS["features"] = 150
CLASSIFIER_PARAMS["classes"] = 1
CLASSIFIER_PARAMS["edge_dim"] = None
model = get_classification_model(CLASSIFIER_PARAMS)
model.load("results/REVEAL_CLASSIFIER_LINEVUL_baseline/checkpoint/")
model.eval()

GraphClassifier(
  (encoder): CompositeGraphLevelEncoder(
    (node_level_encoder): GGNNEncoder(
      (layers): ModuleList()
      (norms): ModuleList()
      (ggnn): GatedGraphConv(200, num_layers=8)
      (out): Linear(in_features=200, out_features=200, bias=True)
    )
  )
  (classifier): MLPClassifier(
    (layers): ModuleList(
      (0): Linear(in_features=200, out_features=256, bias=True)
      (1): Linear(in_features=256, out_features=128, bias=True)
      (2): Linear(in_features=128, out_features=256, bias=True)
      (3): Linear(in_features=256, out_features=1, bias=True)
    )
  )
)

In [4]:
from sklearn.metrics import matthews_corrcoef, accuracy_score, f1_score, balanced_accuracy_score

def compute_metrics(pred, true):
    predicted = (torch.as_tensor(pred) > 0.5).long().tolist()
    return {
        "MCC": matthews_corrcoef(true, predicted),
        "F1": f1_score(true, predicted, average='macro'),
        "Acc": accuracy_score(true, predicted),
        "BAcc": balanced_accuracy_score(true, predicted),
    }

In [5]:
@torch.no_grad()
def eval(CPG_SET, dataset_name):
    print(f"Evaluating {CPG_SET}")
    preds = []
    trues = []
    index = []
    for name in tqdm(glob.glob(f"cache/{CPG_SET}/*.cpg.pt.gz")):
        name = name.replace("\\", "/")
        idx = name.split("/")[-1].split("_")[0]
        label = int(name.split("/")[-1].split("_")[-1].split(".")[0])

        object_file = pickle.load(gzip.open(name))
        data = Data(x=torch.cat((object_file["astenc"], object_file["codeenc"]), dim=1), edge_index=object_file["edge_index"], y=object_file["y"])
        data.edge_index = data.edge_index.long()
        data.x = data.x.float()
        pred = model.classify(data)

        preds.append(pred.squeeze().item())
        trues.append(label)
        index.append(int(idx))
    print(compute_metrics(preds, trues))
    data = Dataset.from_dict({
        "index": index,
        "pred": preds,
        "true": trues,
    })
    data.save_to_disk(os.path.join("data/report/prediction/reveal", dataset_name))

In [6]:
DATASETS = [
    ("LINEVUL_TEST", "test"),
    ("LINEVUL_CHROMIUM", "perturbed-data/apply_codestyle_Chromium"),
    ("LINEVUL_GOOGLE", "perturbed-data/apply_codestyle_Google"),
    ("LINEVUL_LLVM", "perturbed-data/apply_codestyle_LLVM"),
    ("LINEVUL_MOZILLA", "perturbed-data/apply_codestyle_Mozilla"),
    ("LINEVUL_COBFUSCATE", "perturbed-data/apply_cobfuscate"),
    ("LINEVUL_DOUBLE_OBFUSCATE", "perturbed-data/double_obfuscate"),
    ("LINEVUL_OBFUSCATE_STLYE", "perturbed-data/obfuscate_then_style"),
    ("LINEVUL_PYOBFUSCATE_STLYE", "perturbed-data/py_obfuscate_then_style"),
    ("LINEVUL_PYOBFUSCATOR", "perturbed-data/apply_py_obfuscator"),
]

In [7]:
for cpg_path, dataset_name in DATASETS:
    eval(cpg_path, dataset_name)

Evaluating LINEVUL_TEST


100%|██████████| 9960/9960 [03:10<00:00, 52.40it/s]


{'MCC': 0.13004653768274682, 'F1': 0.564230028049447, 'Acc': 0.8914658634538153, 'BAcc': 0.5718852431421637}


Saving the dataset (0/1 shards):   0%|          | 0/9960 [00:00<?, ? examples/s]

Evaluating LINEVUL_CHROMIUM


100%|██████████| 9968/9968 [03:13<00:00, 51.57it/s]


{'MCC': 0.13230661833963397, 'F1': 0.56545885320408, 'Acc': 0.8921548956661316, 'BAcc': 0.5726421269123793}


Saving the dataset (0/1 shards):   0%|          | 0/9968 [00:00<?, ? examples/s]

Evaluating LINEVUL_GOOGLE


100%|██████████| 9967/9967 [03:11<00:00, 52.15it/s]


{'MCC': 0.1265459138799394, 'F1': 0.562146249910711, 'Acc': 0.8881308317447577, 'BAcc': 0.5712900143882419}


Saving the dataset (0/1 shards):   0%|          | 0/9967 [00:00<?, ? examples/s]

Evaluating LINEVUL_LLVM


100%|██████████| 9967/9967 [03:10<00:00, 52.31it/s]


{'MCC': 0.13175952471129812, 'F1': 0.5651573562874523, 'Acc': 0.8918430821711648, 'BAcc': 0.5724786199606239}


Saving the dataset (0/1 shards):   0%|          | 0/9967 [00:00<?, ? examples/s]

Evaluating LINEVUL_MOZILLA


100%|██████████| 10149/10149 [02:58<00:00, 56.81it/s]

{'MCC': 0.12994687867621513, 'F1': 0.5646078176952396, 'Acc': 0.8957532761848458, 'BAcc': 0.569617858658973}





Saving the dataset (0/1 shards):   0%|          | 0/10149 [00:00<?, ? examples/s]

Evaluating LINEVUL_COBFUSCATE


100%|██████████| 18233/18233 [05:38<00:00, 53.83it/s]

{'MCC': -0.004637888965314918, 'F1': 0.22330325399488127, 'Acc': 0.2429112049580431, 'BAcc': 0.495797514264231}





Saving the dataset (0/1 shards):   0%|          | 0/18233 [00:00<?, ? examples/s]

Evaluating LINEVUL_DOUBLE_OBFUSCATE


100%|██████████| 18402/18402 [08:22<00:00, 36.65it/s]


{'MCC': -0.013020779068161079, 'F1': 0.27894882320162706, 'Acc': 0.32469296815563525, 'BAcc': 0.4867738660609815}


Saving the dataset (0/1 shards):   0%|          | 0/18402 [00:00<?, ? examples/s]

Evaluating LINEVUL_OBFUSCATE_STLYE


100%|██████████| 18337/18337 [08:02<00:00, 38.01it/s]


{'MCC': 0.00722309922886159, 'F1': 0.23421330718329686, 'Acc': 0.25663958117467417, 'BAcc': 0.5066588943861485}


Saving the dataset (0/1 shards):   0%|          | 0/18337 [00:00<?, ? examples/s]

Evaluating LINEVUL_PYOBFUSCATE_STLYE


100%|██████████| 9229/9229 [04:37<00:00, 33.23it/s]


{'MCC': 0.12142616528190155, 'F1': 0.5603252413694015, 'Acc': 0.8907790659876477, 'BAcc': 0.565196176044325}


Saving the dataset (0/1 shards):   0%|          | 0/9229 [00:00<?, ? examples/s]

Evaluating LINEVUL_PYOBFUSCATOR


100%|██████████| 9180/9180 [03:55<00:00, 38.92it/s]


{'MCC': 0.11724991726623797, 'F1': 0.5576267580898103, 'Acc': 0.8854030501089325, 'BAcc': 0.5656741448596652}


Saving the dataset (0/1 shards):   0%|          | 0/9180 [00:00<?, ? examples/s]