In [1]:
import json
import evaluate
import pandas as pd
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def filter_valid(preds, labels):
    filtered_label, filtered_pred = zip(*[(label, pred) for pred, label in zip(preds, labels) if pred != '-1'])
    return filtered_label, filtered_pred

def rearrange_string(s):
    parts = s.split('-')
    return '-'.join([parts[1], parts[0]])

def calculate_valid(preds,labels):
    try:
        filtered_labels, filtered_preds = filter_valid(preds, labels)
    except ValueError:
        filtered_labels, filtered_preds = [],[]
        valid_acc,valid_f1,valid_rec = 0,0,0
    else:
        valid_acc = accuracy_metric.compute(predictions=filtered_preds, references=filtered_labels)['accuracy']*100
        valid_f1 = f1_metric.compute(predictions=filtered_preds, references=filtered_labels)['f1']*100
        valid_rec = len(filtered_labels)/len(labels)*100
    return valid_acc,valid_f1,valid_rec

def calculate_overall(preds,labels):
    acc = accuracy_metric.compute(predictions=preds, references=labels)['accuracy']*100
    f1_macro = f1_metric.compute(predictions=preds, references=labels,average='macro' )['f1']*100
    return acc,f1_macro

def summary_table(file_list,path,labels):
    df = pd.DataFrame(columns=['experiment', '%overall_acc','%overall_f1(macro)',
                            '%valid_rec' ,'%valid_acc', '%valid_f1'])
    for file in file_list:
        try:
            with open(path[0]+file+path[1], 'r') as f:
                data = json.load(f)
        except FileNotFoundError:
            pass
        else:
            preds = [pred[0]['prediction']for pred in data]
            acc,f1_macro = calculate_overall(preds,labels)
            valid_acc,valid_f1,valid_rec = calculate_valid(preds,labels)
            df.loc[len(df)] = {'experiment': rearrange_string(file), '%overall_acc': round(acc, 2), '%overall_f1(macro)': round(f1_macro, 2),
                            '%valid_rec': round(valid_rec, 2),'%valid_acc': round(valid_acc, 2), '%valid_f1': round(valid_f1, 2)}
    return df

def contains_keyword(sentence, keywords):
    for keyword in keywords:
        if keyword.lower() in sentence.lower():
            return True
    return False

def paper_method(raw_texts,labels,positive_kw,negative_kw):
    correct_count,valid_rec = 0,0
    for text,label in zip(raw_texts,labels):
        defect = contains_keyword(text, positive_kw)
        no_defect = contains_keyword(text, negative_kw)
        if defect and (not no_defect):
            valid_rec+=1
            if label == '1': correct_count+=1
        elif no_defect and (not defect):
            valid_rec+=1
            if label == '0': correct_count+=1
    return correct_count/len(raw_texts)*100, valid_rec/len(raw_texts)*100

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
models = ['1b', '3b', '7b']
methods = ['lora','adalora','ia3','prompt','ptuning','parallel','adapterp','adapterh','fft']
file_list = [f"{model}-{method}" for method in methods for model in models]

# Defect detection

In [3]:
from datasets import load_dataset
d = load_dataset("code_x_glue_cc_defect_detection", split='test')
labels = list(map(str, map(int, d['target'])))

In [4]:
defect_positive = ['yes','there is a','ere is a','has a defect','contains a defect']
defect_negative = ['no','there is no defect']

df = pd.DataFrame(columns=['experiment', '%acc_new','%overall_acc','%valid_rec_new' ,'%valid_rec'])
for file in file_list:
    try:
        with open('.\\run_result\\generations_'+file+'_fullB.json', 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        pass
    else:
        preds = [pred[0]['prediction']for pred in data]
        raw_texts = [text[0]['raw_text'].split("\nAnswer:")[-1] for text in data]
        acc_new,valid_rec_new = paper_method(raw_texts,labels,defect_positive,defect_negative)
        acc,f1_macro = calculate_overall(preds,labels)
        valid_acc,valid_f1,valid_rec = calculate_valid(preds,labels)
        df.loc[len(df)] = {'experiment': rearrange_string(file), '%acc_new': round(acc_new, 2), '%overall_acc': round(acc, 2),
                            '%valid_rec_new':round(valid_rec_new,2) ,'%valid_rec': round(valid_rec, 2)}
df

Unnamed: 0,experiment,%acc_new,%overall_acc,%valid_rec_new,%valid_rec
0,lora-1b,12.81,25.18,28.84,56.77
1,lora-3b,9.41,11.75,20.28,25.66
2,lora-7b,16.0,27.16,35.72,60.25
3,adalora-1b,19.18,19.22,43.34,43.34
4,adalora-3b,34.0,34.0,60.58,60.58
5,adalora-7b,26.28,26.83,58.2,59.59
6,ia3-1b,20.42,20.42,45.53,45.46
7,ia3-3b,34.66,34.66,61.93,61.93
8,ia3-7b,26.98,28.66,58.02,61.82
9,prompt-1b,0.0,0.0,0.0,0.0


In [5]:
path = ['.\\run_result\\generations_','_fullB.json']
df = summary_table(file_list,path,labels)
df

Unnamed: 0,experiment,%overall_acc,%overall_f1(macro),%valid_rec,%valid_acc,%valid_f1
0,lora-1b,25.18,16.85,56.77,44.36,60.93
1,lora-3b,11.75,11.13,25.66,45.79,62.38
2,lora-7b,27.16,17.35,60.25,45.08,61.86
3,adalora-1b,19.22,14.44,43.34,44.34,61.3
4,adalora-3b,34.0,19.98,60.58,56.13,1.36
5,adalora-7b,26.83,22.06,59.59,45.02,51.01
6,ia3-1b,20.42,15.02,45.46,44.93,61.83
7,ia3-3b,34.66,20.0,61.93,55.97,0.53
8,ia3-7b,28.66,22.31,61.82,46.36,56.27
9,prompt-1b,0.0,0.0,0.0,0.0,0.0


# Clone detection

In [6]:
with open('.\\run_result\\clone_generations_1b-ia3.json', 'r') as f:
    data = json.load(f)
labels = [label[0]["true_label"]for label in data]

In [9]:
clone_positive = ['yes','there is a','ere is a']
clone_negative = ['no','there is no']

df = pd.DataFrame(columns=['experiment(clone)', '%acc_new','%overall_acc','%valid_rec_new' ,'%valid_rec'])
for file in file_list:
    try:
        with open('.\\run_result\\clone_generations_'+file+'.json', 'r') as f:
            data = json.load(f)
    except FileNotFoundError:
        pass
    else:
        preds = [pred[0]['prediction']for pred in data]
        raw_texts = [text[0]['raw_text'].split("\nAnswer:")[-1] for text in data]
        acc_new,valid_rec_new = paper_method(raw_texts,labels,defect_positive,defect_negative)
        acc,f1_macro = calculate_overall(preds,labels)
        valid_acc,valid_f1,valid_rec = calculate_valid(preds,labels)
        df.loc[len(df)] = {'experiment(clone)': rearrange_string(file), '%acc_new': round(acc_new, 2), '%overall_acc': round(acc, 2),
                            '%valid_rec_new':round(valid_rec_new,2) ,'%valid_rec': round(valid_rec, 2)}
df

Unnamed: 0,experiment(clone),%acc_new,%overall_acc,%valid_rec_new,%valid_rec
0,lora-1b,0.45,7.25,3.5,51.35
1,lora-3b,7.9,8.0,51.95,52.55
2,lora-7b,8.6,9.55,52.5,56.4
3,adalora-1b,3.45,7.15,24.8,50.0
4,adalora-3b,12.8,12.8,35.6,35.65
5,adalora-7b,11.25,11.25,51.95,51.95
6,ia3-1b,2.25,7.35,17.4,51.1
7,ia3-3b,21.05,21.05,48.55,48.6
8,ia3-7b,8.25,8.25,51.95,51.95
9,prompt-1b,0.0,0.0,0.0,0.0


In [13]:
path = ['.\\run_result\\clone_generations_','.json']
df = summary_table(file_list,path,labels)
df

Unnamed: 0,experiment(clone),%overall_acc,%overall_f1(macro),%valid_rec,%valid_acc,%valid_f1
0,lora-1b,7.25,7.38,51.35,14.12,24.74
1,lora-3b,8.0,7.94,52.55,15.22,24.81
2,lora-7b,9.55,8.97,56.4,16.93,26.28
3,adalora-1b,7.15,7.43,50.0,14.3,25.02
4,adalora-3b,12.8,12.84,35.65,35.9,25.93
5,adalora-7b,11.25,10.62,51.95,21.66,24.07
6,ia3-1b,7.35,7.51,51.1,14.38,25.15
7,ia3-3b,21.05,17.52,48.6,43.31,25.44
8,ia3-7b,8.25,8.21,51.95,15.88,24.91
9,prompt-1b,0.0,0.0,0.0,0.0,0.0
