In [1]:
import json
import evaluate
import pandas as pd
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def filter_valid(preds, labels):
    filtered_label, filtered_pred = zip(*[(label, pred) for pred, label in zip(preds, labels) if pred != '-1'])
    return filtered_label, filtered_pred

def rearrange_string(s):
    parts = s.split('-')
    return '-'.join([parts[1], parts[0]])

def calculate_valid(preds,labels):
    try:
        filtered_labels, filtered_preds = filter_valid(preds, labels)
    except ValueError:
        filtered_labels, filtered_preds = [],[]
        valid_acc,valid_f1,valid_rec = 0,0,0
    else:
        valid_acc = accuracy_metric.compute(predictions=filtered_preds, references=filtered_labels)['accuracy']*100
        valid_f1 = f1_metric.compute(predictions=filtered_preds, references=filtered_labels)['f1']*100
        valid_rec = len(filtered_labels)/len(labels)*100
    return valid_acc,valid_f1,valid_rec

def calculate_overall(preds,labels):
    acc = accuracy_metric.compute(predictions=preds, references=labels)['accuracy']*100
    f1_macro = f1_metric.compute(predictions=preds, references=labels,average='macro' )['f1']*100
    return acc,f1_macro

def contains_keyword(sentence, keywords):
    for keyword in keywords:
        if keyword.lower() in sentence.lower():
            return True
    return False

def paper_method(raw_texts,positive_kw,negative_kw):
    preds_list =[]
    for text in raw_texts:
        defect = contains_keyword(text, positive_kw)
        no_defect = contains_keyword(text, negative_kw)
        if defect and (not no_defect):
            preds_list.append("1")
        elif no_defect and (not defect):
            preds_list.append("0")
        else:
            preds_list.append("-1")
    return preds_list

def summary_table(file_list,path,labels,positive_kw,negative_kw):
    df = pd.DataFrame(columns=['experiment', '%overall_acc','%overall_acc_new','%f1_macro','%f1_macro_new',
                            '%valid_rec' ,'%valid_rec_new', '%valid_acc', '%valid_acc_new','%valid_f1', '%valid_f1_new'])
    for file in file_list:
        try:
            with open(path[0]+file+path[1], 'r') as f:
                data = json.load(f)
        except FileNotFoundError:
            pass
        else:
            preds = [pred[0]['prediction']for pred in data]
            acc,f1_macro = calculate_overall(preds,labels)
            valid_acc,valid_f1,valid_rec = calculate_valid(preds,labels)
            
            raw_texts = [text[0]['raw_text'].split("\nAnswer:")[-1] for text in data]
            preds_new = paper_method(raw_texts,positive_kw,negative_kw)
            acc_new,f1_macro_new = calculate_overall(preds_new,labels)
            valid_acc_new,valid_f1_new,valid_rec_new = calculate_valid(preds_new,labels)

            df.loc[len(df)] = {'experiment': rearrange_string(file), '%overall_acc': round(acc, 2), '%overall_acc_new': round(acc_new, 2),
                               '%f1_macro': round(f1_macro, 2),'%f1_macro_new': round(f1_macro_new, 2),
                               '%valid_rec': round(valid_rec, 2),'%valid_rec_new': round(valid_rec_new, 2), 
                               '%valid_acc': round(valid_acc, 2),'%valid_acc_new': round(valid_acc_new, 2),
                               '%valid_f1': round(valid_f1, 2),'%valid_f1_new': round(valid_f1_new, 2)}
    return df

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import random
from datasets import load_dataset,concatenate_datasets
def sampling(dataset,neg_n_sample,pos_n_sample,seed=0):
    random.seed(seed)
    neg_ds = dataset.filter(lambda example: example["target"]==False)
    pos_ds = dataset.filter(lambda example: example["target"]==True)
    neg_sampled_indices = random.sample(range(0, len(neg_ds)), neg_n_sample)
    pos_sampled_indices = random.sample(range(0, len(pos_ds)), pos_n_sample)
    neg_selected_ds = neg_ds.select(neg_sampled_indices)
    pos_selected_ds = pos_ds.select(pos_sampled_indices)
    combined_ds = concatenate_datasets([neg_selected_ds, pos_selected_ds]).shuffle(seed=seed)
    return combined_ds

In [3]:
models = ['1b', '3b', '7b']
methods = ['lora','adalora','ia3','prompt','ptuning','parallel','adapterp','adapterh','fft']
file_list = [f"{model}-{method}" for method in methods for model in models]

# Defect detection

In [4]:
from datasets import load_dataset,concatenate_datasets
d = load_dataset("code_x_glue_cc_defect_detection", split='test')
dd_labels = list(map(str, map(int, d['target'])))

## defect detection formatB
instruction = '''Is there a defect in the Code, and respond to YES or NO.''' <br>
prompt= f'''Question: {instruction}\n{code}\n\nAnswer:'''

In [5]:
defect_positive = ['yes','there is a','ere is a','has a defect','contains a defect']
defect_negative = ['no','there is no defect']

dd_path = ['.\\run_result\\generations_','_fullB.json']

print("defect detection formatB")
summary_table(file_list,dd_path,dd_labels,defect_positive,defect_negative)

defect detection formatB


Unnamed: 0,experiment,%overall_acc,%overall_acc_new,%f1_macro,%f1_macro_new,%valid_rec,%valid_rec_new,%valid_acc,%valid_acc_new,%valid_f1,%valid_f1_new
0,lora-1b,25.18,12.81,16.85,11.43,56.77,28.84,44.36,44.42,60.93,61.51
1,lora-3b,11.75,9.41,11.13,9.72,25.66,20.28,45.79,46.39,62.38,62.26
2,lora-7b,27.16,16.0,17.35,13.35,60.25,35.72,45.08,44.77,61.86,61.25
3,adalora-1b,19.22,19.18,14.44,14.42,43.34,43.34,44.34,44.26,61.3,61.22
4,adalora-3b,34.0,34.0,19.98,19.98,60.58,60.58,56.13,56.13,1.36,1.36
5,adalora-7b,26.83,26.28,22.06,21.84,59.59,58.2,45.02,45.16,51.01,50.9
6,ia3-1b,20.42,20.42,15.02,15.01,45.46,45.53,44.93,44.86,61.83,61.76
7,ia3-3b,34.66,34.66,20.0,20.0,61.93,61.93,55.97,55.97,0.53,0.53
8,ia3-7b,28.66,26.98,22.31,21.64,61.82,58.02,46.36,46.5,56.27,56.15
9,prompt-1b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## defect detection formatA
instruction = '''Is there a defect in the Code, and respond to YES or NO.''' <br>
prompt= f'''Question: {instruction}\n{code}\n\nAnswer:'''

In [6]:
defect_positive = ['yes','there is a','ere is a','has a defect','contains a defect']
defect_negative = ['no','there is no defect']

dd_path = ['.\\run_result\\defect_generations_','_A.json']

print("defect detection formatA")
summary_table(file_list,dd_path,dd_labels,defect_positive,defect_negative)

defect detection formatA


Unnamed: 0,experiment,%overall_acc,%overall_acc_new,%f1_macro,%f1_macro_new,%valid_rec,%valid_rec_new,%valid_acc,%valid_acc_new,%valid_f1,%valid_f1_new
0,lora-1b,40.7,27.64,24.66,18.24,86.46,60.54,47.08,45.65,60.84,61.73
1,lora-3b,44.88,42.17,21.02,20.73,98.46,91.65,45.58,46.01,62.46,62.86
2,lora-7b,48.1,36.79,30.83,27.6,99.85,75.84,48.17,48.5,56.36,53.26
3,adalora-1b,52.42,52.42,29.08,29.08,97.18,97.18,53.94,53.94,20.84,20.84
4,adalora-3b,28.4,28.4,18.5,18.49,49.16,49.19,57.78,57.74,1.39,1.39
5,adalora-7b,37.04,37.04,20.43,20.43,67.06,67.06,55.24,55.24,0.24,0.24
6,ia3-1b,53.66,53.66,28.69,28.69,97.91,97.91,54.8,54.8,18.03,18.03
7,ia3-3b,33.42,33.38,19.92,19.86,59.48,59.48,56.18,56.12,1.93,1.66
8,ia3-7b,37.37,37.37,20.44,20.44,68.05,68.05,54.92,54.92,0.24,0.24
9,prompt-1b,24.01,24.01,16.21,16.21,53.37,53.37,44.99,44.99,61.95,61.95


# Clone detection

In [7]:
with open('.\\run_result\\clone_generations_1b-ia3.json', 'r') as f:
    data = json.load(f)
cd_labels = [label[0]["true_label"]for label in data]

## clone detection formatB
instruction= '''Is there a clone relation between the Code1 and Code2, and respond to YES or NO.''' <br>
code1= doc['func1'] <br>
code2= doc['func2'] <br>
prompt= f'''Question: {instruction}\nCode1: {code1}.\nCode2: {code2}.\n\nAnswer:'''

In [9]:
clone_positive = ['yes','there is a','ere is a']
clone_negative = ['no','there is no']

cd_path = ['.\\run_result\\clone_generations_','.json']

print("clone detection formatB")
summary_table(file_list,cd_path,cd_labels,clone_positive,clone_negative)

clone detection formatB


Unnamed: 0,experiment,%overall_acc,%overall_acc_new,%f1_macro,%f1_macro_new,%valid_rec,%valid_rec_new,%valid_acc,%valid_acc_new,%valid_f1,%valid_f1_new
0,lora-1b,7.25,0.45,7.38,1.7,51.35,3.5,14.12,12.86,24.74,22.78
1,lora-3b,8.0,7.9,7.94,7.89,52.55,51.95,15.22,15.21,24.81,24.64
2,lora-7b,9.55,8.6,8.97,8.48,56.4,52.5,16.93,16.38,26.28,25.21
3,adalora-1b,7.15,3.45,7.43,5.91,50.0,24.8,14.3,13.91,25.02,24.42
4,adalora-3b,12.8,12.8,12.84,12.85,35.65,35.6,35.9,35.96,25.93,25.97
5,adalora-7b,11.25,11.25,10.62,10.62,51.95,51.95,21.66,21.66,24.07,24.07
6,ia3-1b,7.35,2.25,7.51,4.75,51.1,17.4,14.38,12.93,25.15,22.9
7,ia3-3b,21.05,21.05,17.52,17.53,48.6,48.55,43.31,43.36,25.44,25.47
8,ia3-7b,8.25,8.25,8.21,8.21,51.95,51.95,15.88,15.88,24.91,24.91
9,prompt-1b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
