In [1]:
import json
import evaluate
import pandas as pd
import random
from datasets import load_dataset,concatenate_datasets, Dataset
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def sampling(dataset,target,pos_n_sample,neg_n_sample,seed=0):
    random.seed(seed)
    neg_ds = dataset.filter(lambda example: example[target]==False or example[target] == "0")
    pos_ds = dataset.filter(lambda example: example[target]==True or example[target] == "1")
    neg_sampled_indices = random.sample(range(0, len(neg_ds)), neg_n_sample)
    pos_sampled_indices = random.sample(range(0, len(pos_ds)), pos_n_sample)
    neg_selected_ds = neg_ds.select(neg_sampled_indices)
    pos_selected_ds = pos_ds.select(pos_sampled_indices)
    combined_ds = concatenate_datasets([neg_selected_ds, pos_selected_ds]).shuffle(seed=seed)
    return combined_ds

def filter_valid(preds, labels):
    filtered_label, filtered_pred = zip(*[(label, pred) for pred, label in zip(preds, labels) if pred != '-1'])
    return filtered_label, filtered_pred

def rearrange_string(s):
    parts = s.split('-')
    return '-'.join([parts[1], parts[0]])

def calculate_valid(preds,labels):
    try:
        filtered_labels, filtered_preds = filter_valid(preds, labels)
    except ValueError:
        filtered_labels, filtered_preds = [],[]
        valid_acc,valid_f1,valid_rec = 0,0,0
    else:
        valid_acc = accuracy_metric.compute(predictions=filtered_preds, references=filtered_labels)['accuracy']*100
        valid_f1 = f1_metric.compute(predictions=filtered_preds, references=filtered_labels)['f1']*100
        valid_rec = len(filtered_labels)/len(labels)*100
    return valid_acc,valid_f1,valid_rec

def calculate_overall(preds,labels):
    acc = accuracy_metric.compute(predictions=preds, references=labels)['accuracy']*100
    f1_macro = f1_metric.compute(predictions=preds, references=labels,average='macro' )['f1']*100
    return acc,f1_macro

def contains_keyword(sentence, keywords):
    for keyword in keywords:
        if keyword.lower() in sentence.lower():
            return True
    return False

def paper_method(raw_texts,positive_kw,negative_kw):
    preds_list =[]
    for text in raw_texts:
        short_ans = text.lower().replace(',','.').split('.')[0]
        if 'yes' in short_ans and 'no' not in short_ans:
            preds_list.append("1")
        elif 'no' in short_ans and 'yes' not in short_ans:
            preds_list.append("0")
        elif contains_keyword(text, positive_kw):
            preds_list.append("1")
        elif contains_keyword(text, negative_kw):
            preds_list.append("0")
        else:
            preds_list.append("-1")
    return preds_list

def summary_table(file_list,path,labels,positive_kw,negative_kw):
    df = pd.DataFrame(columns=['experiment', '%overall_acc','%overall_f1_macro','%valid_rec' ,'%valid_acc', '%valid_f1'])
    for file in file_list:
        try:
            with open(path[0]+file+path[1], 'r') as f:
                data = json.load(f)
        except FileNotFoundError:
            pass
        else:         
            raw_texts = [text[0]['raw_text'].split("\nAnswer:")[-1] for text in data]
            preds = paper_method(raw_texts,positive_kw,negative_kw)
            acc,f1_macro = calculate_overall(preds,labels)
            valid_acc,valid_f1,valid_rec = calculate_valid(preds,labels)

            df.loc[len(df)] = {'experiment': rearrange_string(file), 
                               '%overall_acc':round(acc, 2),
                               '%overall_f1_macro':round(f1_macro, 2),
                               '%valid_rec':round(valid_rec, 2) ,
                               '%valid_acc':round(valid_acc, 2), 
                               '%valid_f1':round(valid_f1, 2)}
    return df

def summary_table_sampling(file_list,path,pos_n_sample,neg_n_sample,positive_kw,negative_kw):
    df = pd.DataFrame(columns=['experiment', '%overall_acc','%overall_f1_macro','%valid_rec' ,'%valid_acc', '%valid_f1'])
    for file in file_list:
        try:
            with open(path[0]+file+path[1], 'r') as f:
                data = json.load(f)
        except FileNotFoundError:
            pass
        else:
            data = [content[0] for content in data]
            dataset_clean = Dataset.from_pandas(pd.DataFrame(data=data))
            data = sampling(dataset_clean,"true_label",pos_n_sample,neg_n_sample)
            
            labels = [label['true_label'] for label in data]
            raw_texts = [text['raw_text'].split("\nAnswer:")[-1] for text in data]
            preds = paper_method(raw_texts,positive_kw,negative_kw)
            acc,f1_macro = calculate_overall(preds,labels)
            valid_acc,valid_f1,valid_rec = calculate_valid(preds,labels)

            df.loc[len(df)] = {'experiment': rearrange_string(file), 
                               '%overall_acc':round(acc, 2),
                               '%overall_f1_macro':round(f1_macro, 2),
                               '%valid_rec':round(valid_rec, 2) ,
                               '%valid_acc':round(valid_acc, 2), 
                               '%valid_f1':round(valid_f1, 2)}
    return df

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
models = ['1b', '3b', '7b', '15b']
methods = ['lora','adalora','ia3','prompt','ptuning','parallel','adapterp','adapterh','fft']
file_list = [f"{model}-{method}" for method in methods for model in models]

# Defect detection

## defect detection formatB
instruction = '''Is there a defect in the Code, and respond to YES or NO.''' <br>
prompt= f'''Question: {instruction}\n{code}\n\nAnswer:'''

## defect detection formatA
instruction = '''Is there a defect in the Code, and respond to YES or NO.''' <br>
prompt= f'''Question: {instruction}\n{code}\n\nAnswer:'''

In [4]:
defect_positive = ['there is a','ere is a','has a defect','contains a defect']
defect_negative = ['there is no defect','The code is correct']

dd_path = ['.\\run_result\\defect_generations_','_fullA.json']

print("defect detection formatA")
summary_table_sampling(file_list,dd_path,919,1081,defect_positive,defect_negative)

defect detection formatA


Filter: 100%|██████████| 2732/2732 [00:00<00:00, 68663.49 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 95011.31 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 85030.19 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 76774.08 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 120328.03 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 116899.49 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 131886.64 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 102765.24 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 118743.21 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 101595.36 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 153931.82 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 107720.15 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 112463.94 examples/s]
Filter: 100%|██████████| 2732/2732 [00:00<00:00, 104790.48 examples/s]
Filter: 10

Unnamed: 0,experiment,%overall_acc,%overall_f1_macro,%valid_rec,%valid_acc,%valid_f1
0,lora-1b,44.15,21.49,97.6,45.24,61.78
1,lora-3b,44.9,20.98,98.55,45.56,62.47
2,lora-7b,49.05,47.27,100.0,49.05,56.95
3,lora-15b,47.2,31.67,98.6,47.87,46.74
4,adalora-1b,53.2,29.9,97.0,54.85,22.89
5,adalora-3b,28.35,18.52,48.9,57.98,1.44
6,adalora-7b,36.9,20.26,67.4,54.75,0.0
7,adalora-15b,21.55,15.75,37.15,58.01,0.0
8,ia3-1b,53.9,28.92,97.75,55.14,18.57
9,ia3-3b,33.55,20.07,59.35,56.53,2.27


In [None]:
# check invalid prediction

'''with open('.\\run_result\\defect_generations_15b-ptuning_A.json', 'r') as f:
    data = json.load(f)

defect_positive = ['there is a','ere is a','has a defect','contains a defect']
defect_negative = ['there is no defect', 'The code is correct']        
raw_texts = [text[0]['raw_text'].split("\nAnswer:")[-1] for text in data]
paper_preds = paper_method(raw_texts,defect_positive,defect_negative)
preds = [label[0]["prediction"]for label in data]
true_labels = [label[0]["true_label"]for label in data]

df = pd.DataFrame({
    'true_labels': true_labels,
    'raw_texts': raw_texts,
    'paper_preds': paper_preds,
    'preds': preds,
})
filtered_df = df[df['paper_preds'] == '-1']
filtered_df'''
#filtered_df.to_csv('invalid_15b_ptuning.csv', index=False)

'with open(\'.\\run_result\\defect_generations_15b-ptuning_A.json\', \'r\') as f:\n    data = json.load(f)\n\ndefect_positive = [\'there is a\',\'ere is a\',\'has a defect\',\'contains a defect\']\ndefect_negative = [\'there is no defect\', \'The code is correct\']        \nraw_texts = [text[0][\'raw_text\'].split("\nAnswer:")[-1] for text in data]\npaper_preds = paper_method(raw_texts,defect_positive,defect_negative)\npreds = [label[0]["prediction"]for label in data]\ntrue_labels = [label[0]["true_label"]for label in data]\n\ndf = pd.DataFrame({\n    \'true_labels\': true_labels,\n    \'raw_texts\': raw_texts,\n    \'paper_preds\': paper_preds,\n    \'preds\': preds,\n})\nfiltered_df = df[df[\'paper_preds\'] == \'-1\']\nfiltered_df'

# Clone detection

## clone detection formatB
instruction= '''Is there a clone relation between the Code1 and Code2, and respond to YES or NO.''' <br>
code1= doc['func1'] <br>
code2= doc['func2'] <br>
prompt= f'''Question: {instruction}\nCode1: {code1}.\nCode2: {code2}.\n\nAnswer:'''

## clone detection formatA
instruction= '''Is there a clone relation between the Code1 and Code2, and respond to YES or NO.''' <br>
code1= doc['func1'] <br>
code2= doc['func2'] <br>
prompt= f'''Question: Code1: {code1}.\nCode2: {code2}.\n{instruction}\n\nAnswer:''' <br>

In [5]:
clone_positive = ['there is a','ere is a']
clone_negative = ['there is no']

cd_path = ['.\\run_result\\clone_generations_','_fullA.json']

print("clone detection formatA 274/1726")
summary_table_sampling(file_list,cd_path,274,1726,clone_positive,clone_negative)

clone detection formatA 274/1726


Filter: 100%|██████████| 2726/2726 [00:00<00:00, 231596.20 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 233492.74 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 203681.71 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 192220.72 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 235823.63 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 160790.80 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 232779.69 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 184114.15 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 184780.66 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 192918.03 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 199373.52 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 130666.07 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 192774.91 examples/s]
Filter: 100%|██████████| 2726/2726 [00:00<00:00, 192583.34 examples/s]
Filter

Unnamed: 0,experiment,%overall_acc,%overall_f1_macro,%valid_rec,%valid_acc,%valid_f1
0,lora-1b,10.3,9.3,59.95,17.18,26.17
1,lora-3b,13.7,12.05,100.0,13.7,24.1
2,lora-7b,15.3,14.1,100.0,15.3,24.24
3,lora-15b,14.6,8.8,99.8,14.63,24.33
4,adalora-1b,12.5,8.19,90.75,13.77,22.72
5,adalora-3b,7.2,7.89,12.45,57.83,27.59
6,adalora-7b,48.2,22.93,55.35,87.08,1.38
7,adalora-15b,6.8,6.07,19.1,35.6,9.56
8,ia3-1b,12.65,8.28,91.25,13.86,22.64
9,ia3-3b,11.85,11.76,23.15,51.19,25.66


In [46]:
# check invalid prediction
'''with open('.\\run_result\\clone_generations_1b-ptuning_fullA.json', 'r') as f:
    data = json.load(f)

clone_positive = ['there is a','ere is a']
clone_negative = ['there is no']    
raw_texts = [text[0]['raw_text'].split("\nAnswer:")[-1] for text in data]
paper_preds = paper_method(raw_texts,clone_positive,clone_negative)
preds = [label[0]["prediction"]for label in data]
true_labels = [label[0]["true_label"]for label in data]

df = pd.DataFrame({
    'true_labels': true_labels,
    'raw_texts': raw_texts,
    'paper_preds': paper_preds,
    'preds': preds,
})
filtered_df = df[df['paper_preds'] != '-1']
filtered_df'''

Unnamed: 0,true_labels,raw_texts,paper_preds,preds
1,0,Yes\n,1,1
2,0,Yes,1,1
4,0,NO,0,0
5,1,NO\n,0,0
6,0,Yes\n,1,1
...,...,...,...,...
2720,0,NO\n,0,0
2721,1,Yes.\n,1,1
2722,0,Yes.\n,1,1
2724,1,NO\n,0,0
