In [1]:
# load packages
import numpy as np
import torch
import optuna
import json
#from sklearn.metrics import brier_score_loss, log_loss, accuracy_score, precision_score, recall_score, f1_score
from torchcp.classification.scores import THR, APS, SAPS, RAPS
from torchcp.classification.predictors import ClassWisePredictor
import pandas as pd

from typing import Callable, Optional
from model.DeepLOB import deeplob
from utils.torch_dfs import LobDataset
from utils.constants import DEVICE

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
batch_size = 64


dec_data = np.loadtxt('data/input/Train_Dst_NoAuction_DecPre_CF_7.txt')

dec_cal = dec_data[:, int(np.floor(dec_data.shape[1] * 0.8)):int(np.floor(dec_data.shape[1] * 0.975))]
dec_val = dec_data[:, int(np.floor(dec_data.shape[1] * 0.975)):]

dataset_cal = LobDataset(data=dec_cal, k=4, num_classes=3, T=100)
cal_loader = torch.utils.data.DataLoader(dataset=dataset_cal, batch_size=batch_size, shuffle=False)

print('Calibration Data Shape:', dataset_cal.x.shape, dataset_cal.y.shape)

dataset_val = LobDataset(data=dec_val, k=4, num_classes=3, T=100)
val_loader = torch.utils.data.DataLoader(dataset=dataset_val, batch_size=batch_size, shuffle=False)

print('Validation Data Shape:', dataset_val.x.shape, dataset_val.y.shape)

del dec_cal, dec_data, dataset_cal, dec_val, dataset_val

dec_test1 = np.loadtxt('data/input/Test_Dst_NoAuction_DecPre_CF_7.txt')
dec_test2 = np.loadtxt('data/input/Test_Dst_NoAuction_DecPre_CF_8.txt')
dec_test3 = np.loadtxt('data/input/Test_Dst_NoAuction_DecPre_CF_9.txt')
dec_test = np.hstack((dec_test1, dec_test2, dec_test3))

dataset_test = LobDataset(data=dec_test, k=4, num_classes=3, T=100)
test_loader = torch.utils.data.DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False)


print('Test Data Shape:',dataset_test.x.shape, dataset_test.y.shape)

del dec_test, dec_test1, dec_test2, dec_test3, dataset_test

Calibration Data Shape: torch.Size([44482, 1, 100, 40]) torch.Size([44482])
Validation Data Shape: torch.Size([6270, 1, 100, 40]) torch.Size([6270])
Test Data Shape: torch.Size([139488, 1, 100, 40]) torch.Size([139488])


In [3]:
model = deeplob(y_len = 3)
model_path = 'model/best_val_model_pytorch'

model = torch.load(model_path,  map_location=torch.device(DEVICE))
model.eval()

  model = torch.load(model_path,  map_location=torch.device(DEVICE))


deeplob(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(1, 2), stride=(1, 2))
    (1): LeakyReLU(negative_slope=0.01)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(4, 1), stride=(1, 1))
    (4): LeakyReLU(negative_slope=0.01)
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(4, 1), stride=(1, 1))
    (7): LeakyReLU(negative_slope=0.01)
    (8): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 32, kernel_size=(1, 2), stride=(1, 2))
    (1): Tanh()
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 32, kernel_size=(4, 1), stride=(1, 1))
    (4): Tanh()
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(32, 32, kernel_size=(4, 1), stride

In [12]:
alphas = [0.1, 0.15, 0.2, 0.25]
score_fun = [APS, RAPS, SAPS]
res = {}

optuna.logging.set_verbosity(optuna.logging.WARNING)

def evaluate_predictor(fun: Callable, alpha: float, temperature: float, x: Optional[float] = None, loader=None):
    if loader is None:
        loader = val_loader  # Default to validation loader if none provided

    if x is not None:
        predictor = ClassWisePredictor(score_function=fun(x), model=model, temperature=temperature)
    else:
        predictor = ClassWisePredictor(score_function=fun(), model=model, temperature=temperature)
    
    predictor.calibrate(cal_loader, alpha)
    return predictor.evaluate(loader)

def objective(trial, fun: Callable, alpha: float):
    temperature = trial.suggest_float("temperature", 0.1, 10.0, log=True)
    
    if fun not in [APS]:
        x = trial.suggest_float("lambda", 0, 1)
        evaluation_results = evaluate_predictor(fun, alpha, temperature, x)
    else:
        evaluation_results = evaluate_predictor(fun, alpha, temperature)
    
    coverage_rate = evaluation_results['Coverage_rate']
    average_size = evaluation_results['Average_size']
    unilable_share = evaluation_results['Unilable_share']
    
    brier_score = evaluation_results['Multiclass_brier_score']
    log_loss = evaluation_results['Log_loss']
    
    if coverage_rate >= 1 - alpha:
        return average_size  # Direction is minimize so adjust sign accordingly
    else:
        return float('inf')  # Penalize trials that don't meet the coverage rate requirement

def process_score_function(fun: Callable):
    fun_name = fun.__name__
    print(fun_name)
    res[fun_name] = {}
    
    for alpha in alphas:
        print(f'Processing alpha: {alpha}')
        study = optuna.create_study(direction="minimize")
        study.optimize(lambda trial: objective(trial, fun, alpha), n_trials=50, show_progress_bar=True)
        
        best_temperature = study.best_params['temperature']
        if fun not in [APS]:
            best_lambda = study.best_params['lambda']
            # After finding the best hyperparameters, evaluate on the test set
            evaluation_results = evaluate_predictor(fun, alpha, best_temperature, best_lambda, loader=test_loader)
            res[fun_name][str(alpha)] = {
                "best_lambda": best_lambda,
                "best_temperature": best_temperature,
                "test_results": evaluation_results
            }
            print(f'alpha: {alpha}, best lambda: {best_lambda}, best temperature: {best_temperature}, test results: {evaluation_results}')
        else:
            # For APS, only tune temperature
            evaluation_results = evaluate_predictor(fun, alpha, best_temperature, loader=test_loader)
            res[fun_name][str(alpha)] = {
                "best_temperature": best_temperature,
                "test_results": evaluation_results
            }
            print(f'alpha: {alpha}, best temperature: {best_temperature}, test results: {evaluation_results}')

for fun in score_fun:
    process_score_function(fun)

with open('results_minsetsize.json', 'w') as json_file:
    json.dump(res, json_file, indent=4)

print("Results saved to results_minsetsize.json")


APS
Processing alpha: 0.1


Best trial: 41. Best value: 2.08325: 100%|██████████| 50/50 [02:39<00:00,  3.20s/it]


alpha: 0.1, best temperature: 0.10314480661523805, test results: {'Coverage_rate': 0.9232693851800872, 'Average_size': 1.9881065037852719, 'Unilable_share': 0.21719431062170222, 'Multiclass_brier_score': 0.159585007991448, 'Log_loss': 2.0040067345260324}
Processing alpha: 0.15


Best trial: 33. Best value: 1.73333: 100%|██████████| 50/50 [02:43<00:00,  3.27s/it]


alpha: 0.15, best temperature: 0.1000532615330806, test results: {'Coverage_rate': 0.8864059990823583, 'Average_size': 1.6252795939435651, 'Unilable_share': 0.5100295365909612, 'Multiclass_brier_score': 0.15974838608490413, 'Log_loss': 2.06510884878605}
Processing alpha: 0.2


Best trial: 22. Best value: 1.44992: 100%|██████████| 50/50 [02:41<00:00,  3.24s/it]


alpha: 0.2, best temperature: 0.10139253489752549, test results: {'Coverage_rate': 0.8490049323239275, 'Average_size': 1.3386958017894013, 'Unilable_share': 0.772647109428768, 'Multiclass_brier_score': 0.15967801944402102, 'Log_loss': 2.038176686377976}
Processing alpha: 0.25


Best trial: 27. Best value: 1.24705: 100%|██████████| 50/50 [02:38<00:00,  3.16s/it]


alpha: 0.25, best temperature: 0.24595228876761777, test results: {'Coverage_rate': 0.8111307065840789, 'Average_size': 1.1792268869006652, 'Unilable_share': 0.8416566299610002, 'Multiclass_brier_score': 0.14588861072847134, 'Log_loss': 0.9014917152774021}
RAPS
Processing alpha: 0.1


Best trial: 18. Best value: 2.09171: 100%|██████████| 50/50 [03:13<00:00,  3.88s/it]


alpha: 0.1, best lambda: 0.008357896885019755, best temperature: 0.10380197281502251, test results: {'Coverage_rate': 0.9225238013305804, 'Average_size': 2.0092552764395504, 'Unilable_share': 0.14810593025923377, 'Multiclass_brier_score': 0.15954984097586802, 'Log_loss': 1.9914924146790354}
Processing alpha: 0.15


Best trial: 43. Best value: 1.76794: 100%|██████████| 50/50 [03:35<00:00,  4.31s/it]


alpha: 0.15, best lambda: 0.13171775971443256, best temperature: 0.10162370423079647, test results: {'Coverage_rate': 0.8859256710254646, 'Average_size': 1.6724664487267722, 'Unilable_share': 0.39444253269098417, 'Multiclass_brier_score': 0.15966581090655696, 'Log_loss': 2.033600574184409}
Processing alpha: 0.2


Best trial: 11. Best value: 1.47352: 100%|██████████| 50/50 [03:30<00:00,  4.20s/it]


alpha: 0.2, best lambda: 0.3902138725982041, best temperature: 0.10345045315413344, test results: {'Coverage_rate': 0.8516001376462491, 'Average_size': 1.4057840100940582, 'Unilable_share': 0.596983253039688, 'Multiclass_brier_score': 0.15956867113686402, 'Log_loss': 1.9981663368526796}
Processing alpha: 0.25


Best trial: 11. Best value: 1.22584: 100%|██████████| 50/50 [04:42<00:00,  5.64s/it]


alpha: 0.25, best lambda: 0.004431951570958215, best temperature: 0.10699858367086625, test results: {'Coverage_rate': 0.8139051387933012, 'Average_size': 1.1700576393668272, 'Unilable_share': 0.8401439550355586, 'Multiclass_brier_score': 0.15937644705640513, 'Log_loss': 1.9328382520089356}
SAPS
Processing alpha: 0.1


Best trial: 24. Best value: 1.94147: 100%|██████████| 50/50 [03:31<00:00,  4.23s/it]


alpha: 0.1, best lambda: 0.0009767558715134683, best temperature: 1.1350458718068384, test results: {'Coverage_rate': 0.916745526496903, 'Average_size': 1.6930847097958248, 'Unilable_share': 0.6216520417526956, 'Multiclass_brier_score': 0.1541455999388238, 'Log_loss': 0.8152895107195505}
Processing alpha: 0.15


Best trial: 37. Best value: 1.70032: 100%|██████████| 50/50 [03:40<00:00,  4.41s/it]


alpha: 0.15, best lambda: 0.0029230963279330228, best temperature: 0.2374245697739303, test results: {'Coverage_rate': 0.8842409382885983, 'Average_size': 1.5060220233998625, 'Unilable_share': 0.7180044161504933, 'Multiclass_brier_score': 0.14699863964203286, 'Log_loss': 0.9259502925630406}
Processing alpha: 0.2


Best trial: 16. Best value: 1.47352: 100%|██████████| 50/50 [03:34<00:00,  4.30s/it]


alpha: 0.2, best lambda: 0.0020929906442391406, best temperature: 9.45517426213012, test results: {'Coverage_rate': 0.8481518123422803, 'Average_size': 1.3412551617343427, 'Unilable_share': 0.7996243404450563, 'Multiclass_brier_score': 0.21254572201642, 'Log_loss': 1.0557639572640254}
Processing alpha: 0.25


Best trial: 41. Best value: 1.25837: 100%|██████████| 50/50 [03:36<00:00,  4.34s/it]


alpha: 0.25, best lambda: 0.1418194793019299, best temperature: 0.6642571908511493, test results: {'Coverage_rate': 0.8139983367744895, 'Average_size': 1.2061539343886212, 'Unilable_share': 0.8341506079376003, 'Multiclass_brier_score': 0.13133892735748606, 'Log_loss': 0.7174870526066658}
Results saved to results_minsetsize.json
