In [1]:
from model import get_classification_model
from torch_geometric.data import Data
import glob
from tqdm import tqdm
import torch

In [2]:
BASELINE_GIN_CLASSIFIER = {
    "type": "GraphClassifier",
    "name": "BASELINE_GIN",
    "encoder": {
        "type": "GraphComposite",
        "pooling": {
            "type": "sum"
        },
        "encoder": {
            "num_layers": 3,
            "hidden_channels": 128,
            "layer_type": "CGIN",
            "norm_type": "None",

        }
    },
    "classifier": {
        "layer_type": "MLP",
        "dropout": 0.5,
        "num_layers": 3
    }
}

In [3]:
BASELINE_GIN_CLASSIFIER["features"] = 150
BASELINE_GIN_CLASSIFIER["classes"] = 1
model = get_classification_model(BASELINE_GIN_CLASSIFIER).encoder.node_level_encoder

In [4]:
model.load_state_dict(torch.load("14_model.chkpt"))
model.eval()
None

In [5]:
import os
import pickle
import gzip
from collections import defaultdict

In [6]:
from datasets import Dataset

In [17]:
from sklearn.metrics import matthews_corrcoef, accuracy_score, f1_score, balanced_accuracy_score

def compute_metrics(pred, true):
    predicted = torch.as_tensor(pred).argmax(dim=-1).tolist()
    return {
        "MCC": matthews_corrcoef(true, predicted),
        "F1": f1_score(true, predicted, average='macro'),
        "Acc": accuracy_score(true, predicted),
        "BAcc": balanced_accuracy_score(true, predicted),
    }

In [18]:
@torch.no_grad()
def eval(CPG_SET, dataset_name):
    print(f"Evaluating {CPG_SET}")
    preds_c = []
    preds_o = []
    preds_co = []
    trues = []
    index = []
    for name in tqdm(glob.glob(f"../cache/{CPG_SET}/*.cpg.pt.gz")):
        idx = name.split("/")[-1].split("_")[0]
        label = int(name.split("/")[-1].split("_")[-1].split(".")[0])

        object_file = pickle.load(gzip.open(name))
        data = Data(x=torch.cat((object_file["astenc"], object_file["codeenc"]), dim=1), edge_index=object_file["edge_index"], y=object_file["y"])
        data.edge_index = data.edge_index.long()
        data.x = data.x.float()
        c_logs, o_logs, co_logs = model(data)

        preds_co.append(co_logs.squeeze().tolist())
        trues.append(label)
        index.append(int(idx))
    print(compute_metrics(preds_co, trues))
    data = Dataset.from_dict({
        "index": index,
        "pred": preds_co,
        "true": trues,
    })
    data.save_to_disk(os.path.join("report/prediction/cgin", dataset_name))

In [19]:
eval("LINEVUL_TEST", "test")

Evaluating LINEVUL_TEST


100%|██████████| 9960/9960 [03:06<00:00, 53.33it/s]


{'MCC': 0.1257080527035215, 'F1': 0.46966261116479, 'Acc': 0.6399598393574297, 'BAcc': 0.6285108767801137}


Saving the dataset (0/1 shards):   0%|          | 0/9960 [00:00<?, ? examples/s]

In [22]:
DATASETS = [
    # ("LINEVUL_TEST", "test"),
    # ("LINEVUL_CHROMIUM", "perturbed-data/apply_codestyle_Chromium"),
    # ("LINEVUL_GOOGLE", "perturbed-data/apply_codestyle_Google"),
    # ("LINEVUL_LLVM", "perturbed-data/apply_codestyle_LLVM"),
    # ("LINEVUL_MOZILLA", "perturbed-data/apply_codestyle_Mozilla"),
    # ("LINEVUL_COBFUSCATE", "perturbed-data/apply_cobfuscate"),
    # ("LINEVUL_DOUBLE_OBFUSCATE", "perturbed-data/double_obfuscate"),
    ("LINEVUL_OBFUSCATE_STLYE", "perturbed-data/obfuscate_then_style"),
    ("LINEVUL_PYOBFUSCATE_STLYE", "perturbed-data/py_obfuscate_then_style"),
    ("LINEVUL_PYOBFUSCATOR", "perturbed-data/apply_py_obfuscator"),
]

In [23]:
for cpg_path, dataset_name in DATASETS:
    eval(cpg_path, dataset_name)

Evaluating LINEVUL_OBFUSCATE_STLYE


100%|██████████| 18337/18337 [05:55<00:00, 51.57it/s]

{'MCC': 0.039493698776597176, 'F1': 0.39617867339043356, 'Acc': 0.5277308174728691, 'BAcc': 0.5435254367333818}





Saving the dataset (0/1 shards):   0%|          | 0/18337 [00:00<?, ? examples/s]

Evaluating LINEVUL_PYOBFUSCATE_STLYE


100%|██████████| 9229/9229 [03:15<00:00, 47.25it/s]

{'MCC': 0.12472308710013363, 'F1': 0.48075173717105013, 'Acc': 0.6618268501462781, 'BAcc': 0.6237069248113657}





Saving the dataset (0/1 shards):   0%|          | 0/9229 [00:00<?, ? examples/s]

Evaluating LINEVUL_PYOBFUSCATOR


100%|██████████| 9180/9180 [03:00<00:00, 50.90it/s]

{'MCC': 0.12138238909927683, 'F1': 0.4760317610289567, 'Acc': 0.6535947712418301, 'BAcc': 0.6213756078009471}





Saving the dataset (0/1 shards):   0%|          | 0/9180 [00:00<?, ? examples/s]