# Embedding Part

This part is for processing the taint flow steps to return the predicted method names as well as the code vector that are saved in `java_taints_predict_vector.json`.
The input taint flow must be syntactically correct like `java_taints_c2v.json`, done with `clear.py`.

In [None]:
!git clone https://github.com/tech-srl/code2vec

In [None]:
!wget https://s3.amazonaws.com/code2vec/model/java14m_model.tar.gz
!tar -xvzf java14m_model.tar.gz

In [None]:
%cd code2vec
!python3 code2vec.py --load ../models/java14_model/saved_model_iter8.release --export_code_vectors --predict
%cd ..

# Evaluation Part

From here on the vector and method names must be given like in `java_taints_predict_vector.json`.

In [62]:
import json

with open('../output/java_taints_predict_vector.json', 'r', encoding='utf-8') as file:
    feats = json.load(file)

## Functions

In [28]:
def get_iss_by_id(id, dicts):
    for item in dicts:
        if item["id"] == id:
            return item

In [37]:
# calc jaccard similarity

def jaccard(a, b):
    a = set(a)
    b = set(b)
    return len(a.intersection(b)) / len(a.union(b))

## Vector Prediction

Evaluating the model using the code vector.

### Evaluation 4
Check a0 against all others. Look at the majority of rankings inside the |A| window. Assign label.

In [75]:
from torch import nn
import torch
import operator
import copy

# compare all issues to a query. first similar should be from same category.
def evaluate_categories_3(categories, feats, category_check=True):
    cos = nn.CosineSimilarity(dim=0)

    # measures per category
    measures = [{
        "tp": 0,
        "fp": 0,
        "fn": 0,
    } for _ in range(len(categories))]

    # loop through all categories
    for (i_cat, category) in enumerate(categories):
        for (i_query, query_id) in enumerate(category):
            # put all similarity results in this array
            results = []

            query = get_iss_by_id(query_id, feats)
            if "vector" not in query:
                print(query["id"])
                continue

            others = copy.deepcopy(categories)
            del others[i_cat][i_query]

            # compare to all other issues
            for i_check, check_cat in enumerate(others):
                for check_iss in check_cat:
                    check = get_iss_by_id(check_iss, feats)
                    if "vector" not in check:
                        continue
                    sim = cos(torch.Tensor(check['vector']), torch.Tensor(query['vector']))
                    results.append({
                        "bucket": i_check,
                        "similarity": sim
                    })
            
            # sort results by similarity score
            results = sorted(results, key=lambda k: k['similarity'], reverse=True)
            # we only care about the results inside the bucket size
            results = results[:len(category) - 1]
            # check majority category of results
            counts = dict()
            for i in results:
                counts[i['bucket']] = counts.get(i['bucket'], 0) + 1
            majority = max(counts.items(), key=operator.itemgetter(1))[0]

            if i_cat == majority:
                measures[i_cat]['tp'] += 1
            else:
                measures[i_cat]['fn'] += 1
                measures[majority]['fp'] += 1

    return measures

In [None]:
with open('../data/label_java_sqli.json') as json_file:
    categories = json.load(json_file)

measures = evaluate_categories_3(categories, feats)
for measure in measures:
    precision = measure['tp'] / (measure['tp'] + measure['fp']) if (measure['tp'] + measure['fp']) else 0
    recall = measure['tp'] / (measure['tp'] + measure['fn']) if (measure['tp'] + measure['fn']) else 0
    f1 = 2 * ((precision * recall) / (precision + recall)) if (precision + recall) else 0

    print(f"F1: {f1}")
    print("-----------")

### Evaluation 5
Check a0 against all others. Look at rank 1 and apply label.

In [79]:
from torch import nn
import torch
import operator
import copy

# compare all issues to a query. first similar should be from same category.
def evaluate_categories_4(categories, feats, threshold = 1):
    cos = nn.CosineSimilarity(dim=0)

    # measures per category
    measures = [{
        "true": 0,
        "false": 0,
    } for _ in range(len(categories))]

    # loop through all categories
    for (i_cat, category) in enumerate(categories):
        for (i_query, query_id) in enumerate(category):
            # put all similarity results in this array
            results = []

            query = get_iss_by_id(query_id, feats)
            if "vector" not in query:
                continue

            others = copy.deepcopy(categories)
            del others[i_cat][i_query]

            # compare to all other issues
            for i_check_cat, check_cat in enumerate(others):
                for check_iss in check_cat:
                    check = get_iss_by_id(check_iss, feats)
                    if "vector" not in check:
                        continue
                    sim = cos(torch.Tensor(check['vector']), torch.Tensor(query['vector']))
                    results.append({
                        "bucket": i_check_cat == i_cat,
                        "similarity": sim
                    })
            
            # sort results by similarity score
            results = sorted(results, key=lambda k: k['similarity'], reverse=True)

            # check if one under the first threshold results comes from the same category
            found = False
            for i in range(threshold):
                if results[i]["bucket"] == True:
                    measures[i_cat]["true"] += 1
                    found = True
                    break
            if found == False:
                measures[i_cat]["false"] += 1
    
    return measures

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

with open('../data/label_java_xss.json') as json_file:
    categories = json.load(json_file)

limit = 20
accuracies = [ [] for _ in range(len(categories)) ]

# for threshold in range(1, limit):
for threshold in [1, 3, 5]:
    print(f"Threshold: {threshold}")
    measures = evaluate_categories_4(categories, feats, threshold)
    for i_cat, cat in enumerate(measures):
        print(f"Kategorie {i_cat + 1}")
        print(cat['true'] / (cat['true'] + cat['false']))
    print("---------------")

## Method Name Prediction

Evaluating the model using the method name set as representation.

### Evaluation 4
Check a0 against all others. Look at the majority of rankings inside the |A| window. Assign label.

In [67]:
from torch import nn
import operator
import copy

# compare all issues to a query. first similar should be from same category.
def evaluate_categories_3(categories, feats, category_check=True):

    # measures per category
    measures = [{
        "tp": 0,
        "fp": 0,
        "fn": 0,
    } for _ in range(len(categories))]

    # loop through all categories
    for (i_cat, category) in enumerate(categories):
        for (i_query, query_id) in enumerate(category):
            # put all similarity results in this array
            results = []

            query = get_iss_by_id(query_id, feats)

            if "set" not in query:
                continue

            others = copy.deepcopy(categories)
            del others[i_cat][i_query]

            # compare to all other issues
            for i_check, check_cat in enumerate(others):
                for check_iss in check_cat:
                    check = get_iss_by_id(check_iss, feats)
                    if "set" not in check:
                        continue
                    sim = jaccard(check['set'], query['set'])
                    results.append({
                        "bucket": i_check,
                        "similarity": sim
                    })
            
            # sort results by similarity score
            results = sorted(results, key=lambda k: k['similarity'], reverse=True)
            # we only care about the results inside the bucket size
            results = results[:len(category) - 1]
            # check majority category of results
            counts = dict()
            for i in results:
                counts[i['bucket']] = counts.get(i['bucket'], 0) + 1
            majority = max(counts.items(), key=operator.itemgetter(1))[0]

            if i_cat == majority:
                measures[i_cat]['tp'] += 1
            else:
                measures[i_cat]['fn'] += 1
                measures[majority]['fp'] += 1

    return measures

In [None]:
with open('../data/label_java_xss.json') as json_file:
    categories = json.load(json_file)

measures = evaluate_categories_3(categories, feats)
for measure in measures:
    precision = measure['tp'] / (measure['tp'] + measure['fp']) if (measure['tp'] + measure['fp']) else 0
    recall = measure['tp'] / (measure['tp'] + measure['fn']) if (measure['tp'] + measure['fn']) else 0
    f1 = 2 * ((precision * recall) / (precision + recall)) if (precision + recall) else 0
    beta = 2
    f_beta = (1 + beta**2) * ((precision * recall) / (beta**2 * precision + recall)) if (precision + recall) else 0
    
    print(f"F1: {f1}")
    print("-----------")

### Evaluation 5
Check a0 against all others. Look at rank 1 and apply label.

In [69]:
from torch import nn
import operator
import copy

# compare all issues to a query. first similar should be from same category.
def evaluate_categories_4(categories, feats, threshold = 1):

    # measures per category
    measures = [{
        "true": 0,
        "false": 0,
    } for _ in range(len(categories))]

    # loop through all categories
    for (i_cat, category) in enumerate(categories):
        for (i_query, query_id) in enumerate(category):
            # put all similarity results in this array
            results = []

            query = get_iss_by_id(query_id, feats)
            if "set" not in query:
                continue
            others = copy.deepcopy(categories)
            del others[i_cat][i_query]

            # compare to all other issues
            for i_check_cat, check_cat in enumerate(others):
                for check_iss in check_cat:
                    check = get_iss_by_id(check_iss, feats)
                    if "set" not in check:
                        continue
                    sim = jaccard(check['set'], query['set'])
                    results.append({
                        "bucket": i_check_cat == i_cat,
                        "similarity": sim
                    })
            
            # sort results by similarity score
            results = sorted(results, key=lambda k: k['similarity'], reverse=True)

            # check if one under the first threshold results comes from the same category
            found = False
            for i in range(threshold):
                if results[i]["bucket"] == True:
                    measures[i_cat]["true"] += 1
                    found = True
                    break
            if found == False:
                measures[i_cat]["false"] += 1
    
    return measures

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

with open('../data/label_java_xss.json') as json_file:
    categories = json.load(json_file)

for threshold in [1, 3, 5]:
    print(f"Threshold: {threshold}")
    measures = evaluate_categories_4(categories, feats, threshold)
    for i_cat, cat in enumerate(measures):
        print(f"Kategorie {i_cat + 1}")
        print(cat['true'] / (cat['true'] + cat['false']))