## load config and vocabulary

In [1]:
from typing import cast

from omegaconf import OmegaConf, DictConfig
from commode_utils.common import print_config
from pytorch_lightning import seed_everything
from src.vocabulary import Vocabulary

config_path = 'configs/surrogate.yaml'
config = cast(DictConfig, OmegaConf.load(config_path))
print_config(config, ["gnn", "classifier", "hyper_parameters"])
seed_everything(config.seed, workers=True)

vocab = Vocabulary.build_from_w2v(config.gnn.w2v_path)
vocab_size = vocab.get_vocab_size()
pad_idx = vocab.get_pad_id()


  rank_zero_deprecation(
Global seed set to 7



gnn                                                                                               | classifier         | hyper_parameters            
-----------------------------------------------------------------------------------------------------------------------------------------------------
name: surrogate                                                                                   | hidden_size: 512   | vector_length: 128          
w2v_path: data/CWE119/w2v.wv                                                                      | n_hidden_layers: 2 | n_epochs: 50                
embed_size: 256                                                                                   | n_classes: 2       | patience: 10                
hidden_size: 256                                                                                  | drop_out: 0.5      | batch_size: 64              
pooling_ratio: 0.8                                                                                |

## load model and DataModule

In [12]:

from src.models.vd import DeepWuKong
from src.datas.datamodules import XFGDataModule

checkpoint_path = './results/surrogate-2023-04-03.ckpt'
model = DeepWuKong.load_from_checkpoint(checkpoint_path)
config = model.hparams["config"]
vocabulary = model.hparams["vocab"]
data_module = XFGDataModule(config, vocabulary)


## setup DataLoader

In [3]:
import os.path as osp
from torch.utils.data import DataLoader

train_dataset_path = osp.join(config.data_folder, config.dataset.name, 'train.json')
dataset = data_module._XFGDataModule__create_dataset(train_dataset_path, retain_source=True)
dl = DataLoader(dataset, batch_size=1, collate_fn=data_module.collate_wrapper)


## load single sample (vulnerability)

In [17]:
x = None
for sample in dl:
    if sample.labels[0] == 1:
        x = sample
        break
x.labels[0]

tensor(1)

## get single prediction

In [18]:

import torch

y_pred = torch.argmax(model(x.graphs))
print(f"true: {x.labels[0]}, predicted: {y_pred}")


true: 1, predicted: 1


## get source lines of predicted/explained sample

In [21]:
xfg = dataset._XFGDataset__XFGs[x.idx[0]]
relevant_lines = list(range(len(xfg._XFG__source)))  # output of LRP: node IDs correspond to source lines
for linenum in relevant_lines:
    print(f"{linenum}: {xfg._XFG__source[linenum]}")


0:     tracepoint(stonesoup_trace, variable_signed_integral, "stonesoup_data.before", stonesoup_data.before, &stonesoup_data.before, "INITIAL-STATE");

1:     tracepoint(stonesoup_trace, variable_buffer, "stonesoup_data.buffer", stonesoup_data.buffer, "INITIAL-STATE");

2:     tracepoint(stonesoup_trace, variable_signed_integral, "stonesoup_data.after", stonesoup_data.after, &stonesoup_data.after, "INITIAL-STATE");

3:     tracepoint(stonesoup_trace, trace_point, "CROSSOVER-POINT: BEFORE");

4:     tracepoint(stonesoup_trace, trace_point, "TRIGGER-POINT: BEFORE");

5:     strcpy(stonesoup_data.buffer, nondivergent_ejectum);

6:     tracepoint(stonesoup_trace, variable_buffer, "stonesoup_data.buffer", stonesoup_data.buffer, "CROSSOVER-STATE");

7:     tracepoint(stonesoup_trace, trace_point, "CROSSOVER-POINT: AFTER");

8:     stonesoup_opt_var = strlen( stonesoup_data.buffer);

9:     for (stonesoup_i = 0; stonesoup_i < stonesoup_opt_var; ++stonesoup_i) {

10:         stonesoup_data.buf