In [18]:
import sys
import yaml
sys.path.append("..\..")

import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm

from src.nn.train.OverlapsNet_train import PLOverlapsNet
from src.data.datasets.overlaps import OverlapsDataset
from src.data.loaders.overlaps import get_overlaps_dataloader

In [19]:
def bootstrap_test(
    model: PLOverlapsNet, 
    dataset: OverlapsDataset, 
    n_bootstraps=1000, 
    num_workers=4, 
    confidence_level=0.95
) -> dict[str, float]:
    model.eval()
    device = next(model.parameters()).device
    
    aurocs = []
     
    loader = get_overlaps_dataloader(
        dataset,
        batch_size=1,
        num_workers=num_workers,
        shuffle=False
    )

    all_samples = []
    all_targets = []
    
    for batch in tqdm(loader, desc="Loading test samples"):
        all_samples.append(batch)
        all_targets.append(batch[1].item())
    
    all_targets = np.array(all_targets)
    positive_indices = np.where(all_targets == 1)[0]
    negative_indices = np.where(all_targets == 0)[0]
    
    for _ in tqdm(range(n_bootstraps), desc="Calculating CI"):
        model.test_metrics.reset()
        
        pos_bootstrap = np.random.choice(positive_indices, size=len(positive_indices), replace=True)
        neg_bootstrap = np.random.choice(negative_indices, size=len(negative_indices), replace=True)
        bootstrap_indices = np.concatenate([pos_bootstrap, neg_bootstrap])
        np.random.shuffle(bootstrap_indices)
        
        for idx in bootstrap_indices:
            batch = all_samples[idx]
            batch = [x.to(device) if isinstance(x, torch.Tensor) else x for x in batch]

            features, targets = batch
            
            with torch.no_grad():
                probs = model.forward(features).sigmoid()
                model.test_metrics.update(probs, targets.int())
        
        auroc = model.test_metrics.compute().item()
        aurocs.append(auroc)
    
    alpha = 1 - confidence_level
    
    lower_percentile = alpha / 2 * 100
    upper_percentile = (1 - alpha / 2) * 100
    
    mean_value = np.mean(aurocs)
    std_value = np.std(aurocs)
    lower_bound = np.percentile(aurocs, lower_percentile)
    upper_bound = np.percentile(aurocs, upper_percentile)
    
    results = {
        'mean': mean_value,
        'std': std_value,
        'lower_bound': lower_bound,
        'upper_bound': upper_bound,
    }
    
    return results

In [22]:
with open("../../src/nn/configs/OverlapsNet_train.yaml") as stream:
    config = yaml.safe_load(stream)
    num_workers = int(config['num_workers'])

model = PLOverlapsNet.load_from_checkpoint("../../src/nn/weights/OverlapsNet/best.ckpt", **config)
model.eval()

test_dataset = OverlapsDataset.from_path("../../datasets/PlagiNet/", split="test")

bootstrap_results = bootstrap_test(
    model=model,
    dataset=test_dataset,
    num_workers=int(config['num_workers']),
)

Calculating distances: 100%|██████████| 4/4 [00:00<00:00, 4015.61it/s]
Calculating distances: 100%|██████████| 24/24 [00:00<00:00, 633.70it/s]
Extracting overlapping features: 100%|██████████| 4/4 [00:00<00:00, 28.07it/s]
Extracting non-overlapping features: 100%|██████████| 24/24 [00:00<00:00, 38.65it/s]
Loading test samples: 100%|██████████| 28/28 [00:03<00:00,  8.10it/s]
Calculating CI: 100%|██████████| 1000/1000 [00:13<00:00, 74.05it/s]


In [23]:
data = []

data.append({
    'Metric': 'auroc',
    'Mean': bootstrap_results['mean'],
    'Std': bootstrap_results['std'],
    'Lower Bound': bootstrap_results['lower_bound'],
    'Upper Bound': bootstrap_results['upper_bound'],
})

pd.set_option('display.float_format', '{:.4f}'.format)
pd.DataFrame(data)

Unnamed: 0,Metric,Mean,Std,Lower Bound,Upper Bound
0,auroc,1.0,0.0,1.0,1.0
