In [None]:
# imports

import os
import json
import time
from pathlib import Path
from typing import Any, Dict, List, Tuple
from dataclasses import dataclass, asdict
from typing import List
from pathlib import Path
import json
import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# prepare paths

root_path = Path("..")
dataset_path = root_path / 'notebook' 'data' / 'test_dataset.json'
results_dir = root_path / 'results'

predictions_csv = results_dir / 'predictions.csv'
summary_csv = results_dir / 'summary.csv'
ablations_csv = results_dir / 'ablations.csv'
chart_path = results_dir / 'comparison_chart.png'

In [None]:
# run ./03_interactive.ipynb to load all methods into memory

%run ./03_interactive.ipynb

def call_embedding(code: str, top_k = 10):
    return detect_embedding(code, top_k = top_k)

def call_llm(code, top_n = 25):
    return detect_llm(code, top_n = top_n)

def call_rag(code: str, top_k = 5):
    return detect_rag(code, top_k = top_k)

def call_hybrid_rag(code, top_k_dense = 5, top_k_bm25 = 5, top_k_fused = 5, w_dense = 0.5):
    return detect_hybrid_rag(
        code,
        top_k_dense = top_k_dense,
        top_k_bm25 = top_k_bm25,
        top_k_fused = top_k_fused,
        w_dense = w_dense
    )

methods = {
    "pure_embedding": call_embedding,
    "direct_llm": call_llm,
    "rag": call_rag,
    "hybrid_rag": call_hybrid_rag
}

method_params = {
    "pure_embedding": {"top_k": 10},
    "direct_llm": {"top_n": 25},
    "rag": {"top_k": 5},
    "hybrid_rag": {"top_k_dense": 5, "top_k_bm25": 5, "top_k_fused": 5, "w_dense": 0.5},
}

In [None]:
# load dataset

@dataclass
class CodeSample:
    id: str
    query_code: str
    is_positive: bool
    source_hint: str
    notes: str

def load_dataset(dataset_path):
    with open(dataset_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        
    return [CodeSample(**item) for item in data]

dataset = load_dataset(dataset_path)

In [61]:
# evaluation results

@dataclass
class EvaluationRow:
    id: str
    is_positive: bool
    method: str
    is_plagiarized: bool
    reason: str
    evidence_mine: any
    evidence_oai: any
    ms: float

rows = []
random_positive = random.choice([s for s in dataset if s.is_positive == 1])
random_negative = random.choice([s for s in dataset if s.is_positive == 0])

for sample in [random_positive, random_negative]:
    for name, func in methods.items():
        params = method_params.get(name, {})

        start_time = time.time()
        result = func(sample.query_code, **params)
        end_time = time.time()
        ms_elapsed = (end_time - start_time) * 1000

        row = EvaluationRow(
            id = sample.id,
            is_positive = sample.is_positive,
            method = result.method,
            is_plagiarized = result.is_plagiarized,
            reason = result.reason,
            evidence_mine = result.evidence_mine,
            evidence_oai = result.evidence_oai,
            ms = ms_elapsed
        )
        
        rows.append(row)

# convert and save
results = pd.DataFrame([asdict(r) for r in rows])
results.to_csv(predictions_csv, index = False)
results

Unnamed: 0,id,is_positive,method,is_plagiarized,reason,evidence_mine,evidence_oai,ms
0,pos_chunk_00007,True,pure_embedding,False,,[],,29.958248
1,pos_chunk_00007,True,direct_llm,True,The candidate snippet [7] contains an exact ma...,,"[Candidate snippet [7]:, ```go\nfunc (d *Downl...",3690.364122
2,pos_chunk_00007,True,rag,True,The candidate snippet [3] contains the exact s...,[],[Candidate snippet [3] contains the function: ...,1499.647856
3,pos_chunk_00007,True,hybrid_rag,True,The candidate snippet [1] contains the exact s...,"[{'index': 6, 'repo': 'got', 'path': 'got\down...",[Candidate snippet [1] has the exact same func...,2270.429134
4,neg_014,False,pure_embedding,False,,[],,40.955782
5,neg_014,False,direct_llm,False,The candidate snippets do not contain any code...,,[],1783.089161
6,neg_014,False,rag,False,The candidate snippets do not share structural...,[],[],1033.396006
7,neg_014,False,hybrid_rag,False,The candidate snippets do not share structural...,"[{'index': 159, 'repo': 'leetcode-go', 'path':...",[],1381.513596
