# Imports

In [1]:
import matplotlib.pyplot as plt
import sys
from tqdm import tqdm

#To import modules
sys.path.append('../')
from notebooks.dataset_helper import DatasetHelper
from codetector.src.features.shared.data.models.code_detection_sample_model import CodeDetectionSampleModel

# Remove Later

In [2]:
from codetector.src.features.shared.data.models.dataset.parquet_dataset import ParquetDataset
class TestDetectionParquetDataset(ParquetDataset):
    def getContentType(self):
        return CodeDetectionSampleModel

    def preProcess(self):
        pass

    def getTag(self):
        return 'test_detection_parquet'

# Main

In [3]:
parq = TestDetectionParquetDataset('../data/detection_parquet')
parq.loadDataset()

print('Loaded dataset')

df = parq.toDataframe()


print('Converted to dataframe')

helper = DatasetHelper()

Loaded dataset
Converted to dataframe


In [28]:
baseModels = ['codellama-13b',
              'codellama-instruct-13b',
              'llama3-8b',
              'llama3-instruct-8b',
              'codellama-7b',
              'codellama-instruct-7b',
              'codegen2_5-7b',
              'codegeex2-6b',
              'starcoder2-7b',
              'codegemma-instruct-7b',
              'wavecoderultra-7b',
              'incoder-6b',
              'phi3mini4k-instruct-4b',
              'starcoder2-3b',
              'phi-1b',
              'incoder-1b',
             ] 

generators = baseModels + ['openaio1-mini']

detectors = ['loglikelihood', 'entropy', 'rank', 'fastdetectgpt', 'binoculars']

In [27]:
### All squares
datasets = ['hf_codesearchnet-python', 'hf_apps', 'stackoverflow-post']
bar = tqdm(datasets,position=0)
auroc_vals : dict[str,dict] = {}

for dataset in bar:
    tag = dataset
    bar.set_description(f'Calculating {tag}')
    
    auroc_vals[tag] = {}
    for detector in detectors:
        auroc_vals[tag][detector] = {}

    for generator in tqdm(generators,position=1,desc='Looping through generators',leave=False):
        genTag = generator
        for detector in detectors:
            auroc_vals[tag][detector][genTag] = {}
        
        for baseModel in tqdm(baseModels, position=2, desc='Looping through base models',leave=False):
            baseTag = baseModel
            filtered = df.loc[(df['Dataset'] == tag) & (df['Language'] == 'python') & ((df['TopP'] == 0.95) & (df['Temperature'] == 0.97)  | (df['Generator'] == 'human'))]
            temp = helper.calculateAUROCScores(parq,filtered,sameGeneratorOnly=False, baseModelOverride=baseTag,generatorOverride=genTag, flipList=['binoculars', 'detectcodegpt', 'rank'], returnFprTpr=True)
            for detector in temp:
                if not (detector in detectors):
                    continue
                
                if detector in temp and baseTag in temp[detector]:
                    auroc_vals[tag][detector][genTag][baseTag] =  temp[detector][baseTag]
        #         break
        #     break
        # break

print('Done calculating AUROC!')

detectors = list(list(auroc_vals.values())[0].keys())#list(map(lambda x:x.keys(),auroc_vals.values()))

for detector in tqdm(detectors, desc='Looping through detectors', position=0):
    generators = list(list(auroc_vals.values())[0][detector].keys())
    # print(generators)
    for generator in tqdm(generators, desc='Looping through generators', position=1,leave=False):
        baseModels = list(list(auroc_vals.values())[0][detector][generator].keys())
        # print(baseModels)
        for baseModel in tqdm(baseModels, desc='Looping through base models', position=2, leave=False):
            # print(f'{detector}, BM: {baseModel}')
            for dataset in tqdm(auroc_vals, desc='Looping through datasets', position=3,leave=False):
                # if not(generator in auroc_vals[dataset][detector]) or not(baseModel in auroc_vals[dataset][detector][generator]):
                #     continue
                auroc, fpr, tpr, _ = auroc_vals[dataset][detector][generator][baseModel]
        
                plt.plot(fpr,tpr,label=f'{dataset}, AUC: {round(auroc,4)}')
            plt.title(f'{detector.capitalize()} (Python Only)\nBase: {baseModel}\nGen: {generator}') #

            plt.xlabel("False Positive Rate")
            plt.ylabel("True Positive Rate")

            plt.tight_layout()
            # plt.rcParams['legend.loc'] = 'lower right'
            plt.legend()
            # plt.show()
            plt.rcParams['svg.fonttype'] = 'none'
            
            plt.savefig(f'./figures/roc/{detector}/{baseModel}_{generator}.png')
            plt.savefig(f'./figures/roc/{detector}/{baseModel}_{generator}.svg')
            # plt.clf()
            plt.close()
            # exit()

Calculating hf_codesearchnet-python:   0%|                                                                                                      | 0/3 [00:00<?, ?it/s]
Looping through generators:   0%|                                                                                                               | 0/1 [00:00<?, ?it/s][A

Looping through base models:   0%|                                                                                                              | 0/1 [00:00<?, ?it/s][A[A

Looping through base models: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.46s/it][A[A

                                                                                                                                                                      [A[A
Looping through generators: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1

Done calculating AUROC!


Looping through detectors:   0%|                                                                                                                | 0/1 [00:00<?, ?it/s]
Looping through generators:   0%|                                                                                                               | 0/1 [00:00<?, ?it/s][A

Looping through base models:   0%|                                                                                                              | 0/1 [00:00<?, ?it/s][A[A


Looping through datasets:   0%|                                                                                                                 | 0/3 [00:00<?, ?it/s][A[A[A


                                                                                                                                                                      [A[A[A

Looping through base models: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████