In [8]:
from model import get_classification_model
from torch_geometric.data import Data
import glob
from tqdm import tqdm
import torch

In [9]:
BASELINE_GIN_CLASSIFIER = {
    "type": "GraphClassifier",
    "name": "BASELINE_GIN",
    "encoder": {
        "type": "GraphComposite",
        "pooling": {
            "type": "sum"
        },
        "encoder": {
            "num_layers": 3,
            "hidden_channels": 128,
            "layer_type": "CGIN",
            "norm_type": "None",

        }
    },
    "classifier": {
        "layer_type": "MLP",
        "dropout": 0.5,
        "num_layers": 3
    }
}

In [10]:
BASELINE_GIN_CLASSIFIER["features"] = 150
BASELINE_GIN_CLASSIFIER["classes"] = 1
model = get_classification_model(BASELINE_GIN_CLASSIFIER).encoder.node_level_encoder

In [11]:
model.load_state_dict(torch.load("14_model.chkpt"))
model.eval()
None

In [12]:
sum(p.numel() for p in model.parameters() if p.requires_grad)

222262

In [13]:
import csv
csv.field_size_limit(1000000000)
groundtruth = {}
with open("../test.csv", 'r', newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in tqdm(reader):
            if row["target"] == "0":
                continue
            if row["flaw_line"] is None:
                 continue
            groundtruth[row["index"]] = row["flaw_line"].split("/~/")

0it [00:00, ?it/s]

18864it [00:22, 849.48it/s] 


In [14]:
import re

@torch.no_grad()
def sort_lines(scores):
    _, indices = torch.sort(torch.as_tensor(scores), descending=True)
    return indices.tolist()


def get_flaw_indices(lines, flaw_lines):
    indices = []
    def clean(line):
        # line = re.sub("^\s", "", line)
        # line = re.sub("\s$", "", line)
        line = re.sub("\s", "", line)
        return line
    flaw_lines = [clean(flaw_line) for flaw_line in flaw_lines if len(clean(flaw_line)) != 0]
    lines = [clean(line) for line in lines]

    for i, line in enumerate(lines):
        if len(line) == 0:
            continue
        if any(line in flaw_line for flaw_line in flaw_lines) or \
            any(flaw_line in line for flaw_line in flaw_lines):
            indices.append(i)
    return indices


def min_rank_of_indices(sorted_indices, searched_indices):
    rank_mapping = {index: rank for rank, index in enumerate(sorted_indices)}
    return min(
        (rank_mapping[index] for index in searched_indices if index in rank_mapping),
        default=float("inf"),
    )

In [15]:
import os
import pickle
import gzip
from collections import defaultdict

In [2]:
def get_c_lines(idx, set="test"):
    with open(f"../data/test/{set}/{idx}_1.c", "r") as f:
        return f.readlines()

In [3]:
def eval_linelevel(CPG_SET, C_SET):
    ranks = []
    successful_idxs = []
    print(f"Evaluating {CPG_SET}")
    for name in tqdm(list(glob.glob(f"../cache/{CPG_SET}/*_1*"))):
        idx = name.split("/")[-1].split("_")[0]
        if idx not in groundtruth:
            continue

        c_lines = get_c_lines(idx, C_SET)
        flaw_indices = get_flaw_indices(c_lines, groundtruth[idx])
        if len(flaw_indices) < 1:
            continue

        object_file = pickle.load(gzip.open(name))
        data = Data(x=torch.cat((object_file["astenc"], object_file["codeenc"]), dim=1), edge_index=object_file["edge_index"], y=object_file["y"])
        data.edge_index = data.edge_index.long()
        data.x = data.x.float()
        c_logs, o_logs, co_logs = model(data)

        edge_c, edge_t, node_c, node_t = model.layer.explain(data)
        node_mask = node_c.sigmoid()

        linescores = defaultdict(int)
        for node_idx, score in enumerate(node_mask):
            if ":" not in object_file["lines"][node_idx]:
                continue
            lower = int(object_file["lines"][node_idx].split(" ")[0].split(":")[0].replace("\"",""))
            upper = int(object_file["lines"][node_idx].split(" ")[0].split(":")[1].replace("\"",""))
            lines = list(range(int(lower), int(upper)+1))
            for line in lines:
                linescores[line] += score/len(lines)
        lines = [0 for _ in range(max(linescores.keys()))]
        for line, score in linescores.items():
            lines[line-1] = score
        
        sorted_lines = sort_lines(lines)
        rank = min_rank_of_indices(sorted_lines, flaw_indices)
        ranks.append(rank)
        if torch.as_tensor(rank).isfinite():
            successful_idxs.append(idx)
    ranks = torch.as_tensor(ranks)
    ranks = ranks[ranks.isfinite()]
    print({
        "Top1-Acc": torch.sum(ranks < 1) / len(ranks),
        "Top5-Acc": torch.sum(ranks < 5) / len(ranks),
        "Top10-Acc": torch.sum(ranks < 10) / len(ranks),
        "Top100-Acc": torch.sum(ranks < 100) / len(ranks),
        "IFA": torch.mean(ranks),
        "Count": len(ranks),
    })
    with open(f"scuccessful_idxs/{CPG_SET}_successful.txt", "w") as f:
        f.write("\n".join(successful_idxs))

In [67]:
eval_linelevel("LINEVUL_TEST", "test")

Evaluating LINEVUL_TEST


100%|██████████| 593/593 [00:25<00:00, 22.92it/s]

{'Top1-Acc': tensor(0.2970), 'Top5-Acc': tensor(0.5916), 'Top10-Acc': tensor(0.7285), 'Top100-Acc': tensor(0.9768), 'IFA': tensor(12.3949), 'Count': 431}
{'Top1-Acc': tensor(0.2991), 'Top5-Acc': tensor(0.5958), 'Top10-Acc': tensor(0.7336), 'Top100-Acc': tensor(0.9836), 'IFA': tensor(12.3949), 'Count': 428}





In [68]:
eval_linelevel("LINEVUL_CHROMIUM", "apply_codestyle_Chromium")

Evaluating LINEVUL_CHROMIUM


100%|██████████| 596/596 [00:23<00:00, 24.85it/s]

{'Top1-Acc': tensor(0.2401), 'Top5-Acc': tensor(0.5289), 'Top10-Acc': tensor(0.6778), 'Top100-Acc': tensor(0.9574), 'IFA': tensor(16.6440), 'Count': 329}
{'Top1-Acc': tensor(0.2446), 'Top5-Acc': tensor(0.5387), 'Top10-Acc': tensor(0.6904), 'Top100-Acc': tensor(0.9752), 'IFA': tensor(16.6440), 'Count': 323}





In [69]:
eval_linelevel("LINEVUL_GNU", "apply_codestyle_GNU")

Evaluating LINEVUL_GNU


100%|██████████| 601/601 [00:29<00:00, 20.59it/s]

{'Top1-Acc': tensor(0.0891), 'Top5-Acc': tensor(0.3217), 'Top10-Acc': tensor(0.5039), 'Top100-Acc': tensor(0.9070), 'IFA': tensor(28.7391), 'Count': 258}
{'Top1-Acc': tensor(0.0909), 'Top5-Acc': tensor(0.3281), 'Top10-Acc': tensor(0.5138), 'Top100-Acc': tensor(0.9249), 'IFA': tensor(28.7391), 'Count': 253}





In [70]:
eval_linelevel("LINEVUL_GOOGLE", "apply_codestyle_Google")

Evaluating LINEVUL_GOOGLE


100%|██████████| 596/596 [00:26<00:00, 22.25it/s]

{'Top1-Acc': tensor(0.2266), 'Top5-Acc': tensor(0.5529), 'Top10-Acc': tensor(0.7009), 'Top100-Acc': tensor(0.9637), 'IFA': tensor(15.3252), 'Count': 331}
{'Top1-Acc': tensor(0.2301), 'Top5-Acc': tensor(0.5613), 'Top10-Acc': tensor(0.7117), 'Top100-Acc': tensor(0.9785), 'IFA': tensor(15.3252), 'Count': 326}





In [71]:
eval_linelevel("LINEVUL_LLVM", "apply_codestyle_LLVM")

Evaluating LINEVUL_LLVM


100%|██████████| 596/596 [00:28<00:00, 20.59it/s]

{'Top1-Acc': tensor(0.2478), 'Top5-Acc': tensor(0.5373), 'Top10-Acc': tensor(0.6866), 'Top100-Acc': tensor(0.9612), 'IFA': tensor(15.7182), 'Count': 335}
{'Top1-Acc': tensor(0.2515), 'Top5-Acc': tensor(0.5455), 'Top10-Acc': tensor(0.6970), 'Top100-Acc': tensor(0.9758), 'IFA': tensor(15.7182), 'Count': 330}





In [72]:
eval_linelevel("LINEVUL_MOZILLA", "apply_codestyle_Mozilla")

Evaluating LINEVUL_MOZILLA


100%|██████████| 601/601 [00:30<00:00, 19.48it/s]

{'Top1-Acc': tensor(0.2212), 'Top5-Acc': tensor(0.4985), 'Top10-Acc': tensor(0.6342), 'Top100-Acc': tensor(0.9499), 'IFA': tensor(19.4187), 'Count': 339}
{'Top1-Acc': tensor(0.2259), 'Top5-Acc': tensor(0.5090), 'Top10-Acc': tensor(0.6476), 'Top100-Acc': tensor(0.9699), 'IFA': tensor(19.4187), 'Count': 332}





In [103]:
# merge successful idxs
merged = None
for path in glob.glob("successful_idxs/*.txt"):
    if "merged" in path:
        continue
    with open(path, "r") as f:
        content = f.read()
    content = content.split("\n")
    if merged is None:
        merged = set(map(int, content))
    merged &= set(map(int, content))
with open("successful_idxs/merged.txt", "w") as f:
    f.write("\n".join(map(str, merged)))
print(len(merged))

181


In [17]:
def eval_linelevel_merged(CPG_SET, C_SET):
    ranks = []
    with open("successful_idxs/merged.txt", "r") as f:
        successful_idxs = set(f.read().split("\n"))
    print(f"Evaluating {CPG_SET}")
    for name in list(map(lambda idx: f"../cache/{CPG_SET}/{idx}_1.cpg.pt.gz", successful_idxs)):
        idx = name.split("/")[-1].split("_")[0]
        if idx not in groundtruth:
            continue

        c_lines = get_c_lines(idx, C_SET)
        flaw_indices = get_flaw_indices(c_lines, groundtruth[idx])
        if len(flaw_indices) < 1:
            continue

        object_file = pickle.load(gzip.open(name))
        data = Data(x=torch.cat((object_file["astenc"], object_file["codeenc"]), dim=1), edge_index=object_file["edge_index"], y=object_file["y"])
        data.edge_index = data.edge_index.long()
        data.x = data.x.float()
        c_logs, o_logs, co_logs = model(data)

        edge_c, edge_t, node_c, node_t = model.layer.explain(data)
        node_mask = node_c.sigmoid()

        linescores = defaultdict(int)
        for node_idx, score in enumerate(node_mask):
            if ":" not in object_file["lines"][node_idx]:
                continue
            lower = int(object_file["lines"][node_idx].split(" ")[0].split(":")[0].replace("\"",""))
            upper = int(object_file["lines"][node_idx].split(" ")[0].split(":")[1].replace("\"",""))
            lines = list(range(int(lower), int(upper)+1))
            for line in lines:
                linescores[line] += score/len(lines)
        lines = [0 for _ in range(max(linescores.keys()))]
        for line, score in linescores.items():
            lines[line-1] = score
        
        sorted_lines = sort_lines(lines)
        rank = min_rank_of_indices(sorted_lines, flaw_indices)
        ranks.append(rank)
    ranks = torch.as_tensor(ranks)
    ranks = ranks[ranks.isfinite()]
    def topk_acc(k):
            return round((
                torch.sum(ranks < k) / len(ranks)
            ).item() * 100, 2)
    print({
        "Top1-Acc": topk_acc(1),
        "Top3-Acc": topk_acc(3),
        "Top5-Acc": topk_acc(5),
    })
    """print({
        "Top1-Acc": torch.sum(ranks < 1) / len(ranks),
        "Top5-Acc": torch.sum(ranks < 5) / len(ranks),
        "Top10-Acc": torch.sum(ranks < 10) / len(ranks),
        "Top100-Acc": torch.sum(ranks < 100) / len(ranks),
        "IFA": torch.mean(ranks.float()),
        "Count": len(ranks),
    })"""

In [18]:
eval_linelevel_merged("LINEVUL_TEST", "test")

Evaluating LINEVUL_TEST
{'Top1-Acc': 43.65, 'Top3-Acc': 58.01, 'Top5-Acc': 68.51}


In [19]:
eval_linelevel_merged("LINEVUL_CHROMIUM", "apply_codestyle_Chromium")

Evaluating LINEVUL_CHROMIUM
{'Top1-Acc': 43.65, 'Top3-Acc': 57.46, 'Top5-Acc': 69.06}


In [20]:
eval_linelevel_merged("LINEVUL_MOZILLA", "apply_codestyle_Mozilla")

Evaluating LINEVUL_MOZILLA


{'Top1-Acc': 42.54, 'Top3-Acc': 58.56, 'Top5-Acc': 69.61}


In [21]:
eval_linelevel_merged("LINEVUL_GOOGLE", "apply_codestyle_Google")

Evaluating LINEVUL_GOOGLE
{'Top1-Acc': 44.2, 'Top3-Acc': 60.77, 'Top5-Acc': 70.72}


In [22]:
eval_linelevel_merged("LINEVUL_LLVM", "apply_codestyle_LLVM")

Evaluating LINEVUL_LLVM
{'Top1-Acc': 44.2, 'Top3-Acc': 58.56, 'Top5-Acc': 69.61}


In [23]:
eval_linelevel_merged("LINEVUL_GNU", "apply_codestyle_GNU")

Evaluating LINEVUL_GNU
{'Top1-Acc': 39.78, 'Top3-Acc': 56.35, 'Top5-Acc': 67.96}


In [4]:
print("".join(get_c_lines("179598", "test")))

PHP_FUNCTION(imageconvolution)
{
zval *SIM, *hash_matrix;
zval **var = NULL, **var2 = NULL;
gdImagePtr im_src = NULL;
double div, offset;
int nelem, i, j, res;
float matrix[3][3] = {{0,0,0}, {0,0,0}, {0,0,0}};

if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "radd", &SIM, &hash_matrix, &div, &offset) == FAILURE) {
RETURN_FALSE;
}

ZEND_FETCH_RESOURCE(im_src, gdImagePtr, &SIM, -1, "Image", le_gd);

nelem = zend_hash_num_elements(Z_ARRVAL_P(hash_matrix));
if (nelem != 3) {
RETURN_FALSE;
}

for (i=0; i<3; i++) {
if (zend_hash_index_find(Z_ARRVAL_P(hash_matrix), (i), (void **) &var) == SUCCESS && Z_TYPE_PP(var) == IS_ARRAY) {
if (Z_TYPE_PP(var) != IS_ARRAY || zend_hash_num_elements(Z_ARRVAL_PP(var)) != 3 ) {
RETURN_FALSE;
}

for (j=0; j<3; j++) {
if (zend_hash_index_find(Z_ARRVAL_PP(var), (j), (void **) &var2) == SUCCESS) {
					SEPARATE_ZVAL(var2);
					convert_to_double(*var2);
					matrix[i][j] = (float)Z_DVAL_PP(var2);
} else {
RETURN_FALSE;
}
}
}
}
res = gdImageConvolution(im_src

In [134]:
def linescores_for(index, CPG_SET="LINEVUL_TEST", C_SET="test"):
    c_lines = get_c_lines(index, C_SET)

    object_file = pickle.load(gzip.open(f"../cache/{CPG_SET}/{index}_1.cpg.pt.gz"))
    data = Data(x=torch.cat((object_file["astenc"], object_file["codeenc"]), dim=1), edge_index=object_file["edge_index"], y=object_file["y"])
    data.edge_index = data.edge_index.long()
    data.x = data.x.float()
    c_logs, o_logs, co_logs = model(data)

    edge_c, edge_t, node_c, node_t = model.layer.explain(data)
    node_mask = node_t.sigmoid()

    linescores = defaultdict(int)
    for node_idx, score in enumerate(node_mask):
        if ":" not in object_file["lines"][node_idx]:
            continue
        lower = int(object_file["lines"][node_idx].split(" ")[0].split(":")[0].replace("\"",""))
        upper = int(object_file["lines"][node_idx].split(" ")[0].split(":")[1].replace("\"",""))
        lines = list(range(int(lower), int(upper)+1))
        for line in lines:
            linescores[line] += score/len(lines)
    lines = [0 for _ in range(max(linescores.keys()))]
    for line, score in linescores.items():
        lines[line-1] = score
    
    sorted_lines = sort_lines(lines)
    print("Sorted lines", sorted_lines)
    print("Line scores", dict((k, v.item()) for k,v in linescores.items()))

In [135]:
linescores_for("179598")

Sorted lines [7, 22, 32, 13, 40, 23, 28, 29, 21, 6, 3, 15, 31, 16, 2, 5, 4, 30, 35, 10, 34, 9, 42, 25, 24, 18, 45, 43, 17, 33, 44, 36, 0, 26, 11, 46, 19, 37, 27, 38, 39, 47, 41, 20, 14, 12, 8, 1]
Line scores {1: 0.5813077688217163, 2: 0.03078407049179077, 3: 2.2228031158447266, 4: 3.6467161178588867, 5: 2.2121074199676514, 6: 2.222046136856079, 7: 3.685823917388916, 8: 10.996650695800781, 9: 0.03078407049179077, 10: 1.1479395627975464, 11: 1.1654982566833496, 12: 0.504533052444458, 13: 0.03078407049179077, 14: 6.467837810516357, 15: 0.03078407049179077, 16: 3.2387633323669434, 17: 2.242281436920166, 18: 0.9080506563186646, 19: 0.9932812452316284, 20: 0.37688708305358887, 21: 0.03078407049179077, 22: 5.808654308319092, 23: 8.294575691223145, 24: 6.1823272705078125, 25: 1.0354623794555664, 26: 1.0578889846801758, 27: 0.5061697959899902, 28: 0.19199636578559875, 29: 6.01950216293335, 30: 5.941442012786865, 31: 1.8142070770263672, 32: 2.3964335918426514, 33: 6.480062007904053, 34: 0.741018