# Imports

In [26]:
import matplotlib.pyplot as plt
import sys
import numpy as np

#To import modules
sys.path.append('../')
from notebooks.dataset_helper import DatasetHelper
from codetector.src.features.shared.data.models.code_detection_sample_model import CodeDetectionSampleModel

# Remove Later

In [27]:
from codetector.src.features.shared.data.models.dataset.parquet_dataset import ParquetDataset
class TestDetectionParquetDataset(ParquetDataset):
    def getContentType(self):
        return CodeDetectionSampleModel

    def preProcess(self):
        pass

    def getTag(self):
        return 'test_detection_parquet'

# Main

In [28]:
def printStats(auroc:dict) -> None:

    means = []
    stds = []
    for detector, values in auroc.items():
        temp = list(values.values())
        mean = np.mean(temp)
        std = np.std(temp)

        means.append(mean)
        stds.append(std)

        print(f'{detector}: {mean} +- {std}, Min: {np.min(temp)} Max: {np.max(temp)}')

    print(f'Average AUROC and standard deviation across methods: {np.mean(means)} {np.mean(stds)}')

In [3]:
parq = TestDetectionParquetDataset('../data/detection_parquet')
parq.loadDataset()

print('Loaded dataset')

df = parq.toDataframe()


print('Converted to dataframe')

helper = DatasetHelper()

Loaded dataset
Converted to dataframe


In [None]:
filtered = df.loc[df['Dataset']=='stackoverflow-post']
auroc = helper.calculateAUROCScores(parq,df=filtered, flipList=['binoculars', 'detectcodegpt', 'rank'],loadingBar=True)
print(auroc)

In [5]:
filtered = df.loc[df['Dataset']=='hf_apps']
auroc = helper.calculateAUROCScores(parq,df=filtered, flipList=['binoculars', 'detectcodegpt', 'rank'],
                                    loadingBar=True,
                                    sameGeneratorOnly=False,
                                    generatorOverride='codegeex2-6b',
                                    baseModelOverride='codellama-instruct-7b')
print(auroc)

Calculating AUROC: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 13464/13464 [00:00<00:00, 27553.06it/s]

{'fastdetectgpt': {'codellama-instruct-7b': 0.7135082819386059}, 'binoculars': {'codellama-instruct-7b': 0.7099422980989512}, 'loglikelihood': {'codellama-instruct-7b': 0.5680777577600478}, 'entropy': {'codellama-instruct-7b': 0.5160253049526405}, 'rank': {'codellama-instruct-7b': 0.3899835092033897}}





In [None]:
generators = ['codellama-13b',
              'codellama-instruct-13b',
              'llama3-8b',
              'llama3-instruct-8b',
              'codellama-7b',
              'codellama-instruct-7b',
              'codegen2_5-7b',
              'codegeex2-6b',
              'starcoder2-7b',
              'codegemma-instruct-7b',
              'wavecoderultra-7b',
              'incoder-6b',
              'phi3mini4k-instruct-4b',
              'starcoder2-3b',
              'phi-1b',
              'incoder-1b',]
baseModels = generators

filtered = df.loc[df['Dataset']=='hf_apps']
output = []
for i,generator in enumerate(generators):
    if i != 1:
        continue
    for baseModel in baseModels:
        auroc = helper.calculateAUROCScores(parq,df=filtered, flipList=['binoculars', 'detectcodegpt', 'rank'],
                                    loadingBar=True,
                                    sameGeneratorOnly=False,
                                    generatorOverride=generator,
                                    baseModelOverride=baseModel)
        output.append(round(auroc['binoculars'][baseModel],2))
    

In [10]:
print(output)

[0.64, 0.63, 0.6, 0.62, 0.66, 0.67, 0.64, 0.56, 0.56, 0.44, 0.79, 0.42, 0.73, 0.63, 0.57, 0.52]


## AUROC White Box

### All Datasets

In [29]:
filtered = df.loc[(((df['TopP']== 0.95) & (df['Temperature'] == 0.97)) | (df['Generator'] == 'human'))]
auroc = helper.calculateAUROCScores(parq,df=filtered, flipList=['binoculars', 'detectcodegpt','rank'],loadingBar=True)

Calculating AUROC: 100%|██████████████████████████████████████████████████████████████████████████████| 1122323/1122323 [00:18<00:00, 60294.61it/s]


In [30]:
auroc = dict(map(lambda x: (x,dict(sorted(auroc[x].items(),key=lambda y:y[1], reverse=True))), auroc.keys()))

In [31]:
printStats(auroc)

fastdetectgpt: 0.8149416187090476 +- 0.1267472088022125, Min: 0.4015237386402016 Max: 0.9646909841350807
binoculars: 0.7869890800729684 +- 0.12742250138526426, Min: 0.36438709460052443 Max: 0.9446938413212193
loglikelihood: 0.5497239881277864 +- 0.10753198114251802, Min: 0.34492972734469285 Max: 0.7821756408142435
entropy: 0.5587295219264783 +- 0.09110828554616163, Min: 0.38081449207008866 Max: 0.7416799210862658
rank: 0.5976868264777389 +- 0.09070832140888738, Min: 0.4094455398076249 Max: 0.7447218728633788
Average AUROC and standard deviation across methods: 0.661614207062804 0.10870365965700875


### Pre Datasets

In [32]:
datasets = ['stackoverflow-pre','hf_apps','hf_codesearchnet-python','hf_leetcode-pre']
filtered = df.loc[(((df['TopP']== 0.95) & (df['Temperature'] == 0.97)) | (df['Generator'] == 'human')) & (df['Dataset'].isin(datasets))]
auroc = helper.calculateAUROCScores(parq,df=filtered, flipList=['binoculars', 'detectcodegpt','rank'],loadingBar=True)
auroc = dict(map(lambda x: (x,dict(sorted(auroc[x].items(),key=lambda y:y[1], reverse=True))), auroc.keys()))
printStats(auroc)

Calculating AUROC: 100%|████████████████████████████████████████████████████████████████████████████████| 683436/683436 [00:11<00:00, 61263.41it/s]

fastdetectgpt: 0.8043746772046345 +- 0.12511037760262153, Min: 0.3996269233287254 Max: 0.9584073032485003
binoculars: 0.7660684127831034 +- 0.12671080133704962, Min: 0.36374428305376477 Max: 0.9342702062636588
loglikelihood: 0.5279828156351329 +- 0.10460846311271559, Min: 0.3210870042979034 Max: 0.7499620832253249
entropy: 0.5715814744129559 +- 0.08631452613741004, Min: 0.4107453828711969 Max: 0.7536758816737834
rank: 0.5786455438610026 +- 0.08724042313841575, Min: 0.39487181833464113 Max: 0.7215737283798115
Average AUROC and standard deviation across methods: 0.649730584779366 0.1059969182656425





### Post Datasets

In [33]:
datasets = ['stackoverflow-post','leetcode-post']
filtered = df.loc[(((df['TopP']== 0.95) & (df['Temperature'] == 0.97)) | (df['Generator'] == 'human')) & (df['Dataset'].isin(datasets))]
auroc = helper.calculateAUROCScores(parq,df=filtered, flipList=['binoculars', 'detectcodegpt','rank'],loadingBar=True)
auroc = dict(map(lambda x: (x,dict(sorted(auroc[x].items(),key=lambda y:y[1], reverse=True))), auroc.keys()))
printStats(auroc)

Calculating AUROC: 100%|████████████████████████████████████████████████████████████████████████████████| 438887/438887 [00:07<00:00, 59141.59it/s]

fastdetectgpt: 0.8337808969502707 +- 0.13213920963977657, Min: 0.40508492208398794 Max: 0.9734039602795174
binoculars: 0.821329211030783 +- 0.1321039930984746, Min: 0.3661347713472889 Max: 0.9596680892058089
entropy: 0.5407346153503334 +- 0.09673919383294796, Min: 0.3427224527431996 Max: 0.7234083358501964
loglikelihood: 0.5829502907827772 +- 0.11309606678370031, Min: 0.3810415498123139 Max: 0.8306051797040169
rank: 0.6269465922519999 +- 0.0975895369952871, Min: 0.4322617935544138 Max: 0.7799232993838803
Average AUROC and standard deviation across methods: 0.6811483212732329 0.1143336000700373



