In [1]:
!pwd

/ssl-jet-vol-v2/JetCLR/notebooks


In [2]:
import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import random
import time
import glob
import argparse
sys.path.append('../')

# load torch modules
import torch
import torch.nn as nn
import torch.nn.functional as F

# load custom modules required for jetCLR training
from scripts.modules.jet_augs import rotate_jets, distort_jets, rescale_pts, crop_jets, translate_jets, collinear_fill_jets
from scripts.modules.transformer import Transformer
from scripts.modules.losses import contrastive_loss, align_loss, uniform_loss
from scripts.modules.perf_eval import get_perf_stats, linear_classifier_test 

In [3]:
def load_data(dataset_path, flag, n_files=-1):
    if args.full_kinematics:
        data_files = glob.glob(f"{dataset_path}/{flag}/processed/7_features_raw/data/*")
    else:
        data_files = glob.glob(f"{dataset_path}/{flag}/processed/3_features/data/*")

    data = []
    for i, file in enumerate(data_files):
        if args.full_kinematics:
            data.append(np.load(f"{dataset_path}/{flag}/processed/7_features_raw/data/data_{i}.npy")) 
        else:
            data.append(torch.load(f"{dataset_path}/{flag}/processed/3_features/data/data_{i}.pt")) 
        print(f"--- loaded file {i} from `{flag}` directory")
        if n_files != -1 and i == n_files - 1:
            break

    return data


def load_labels(dataset_path, flag, n_files=-1):
    data_files = glob.glob(f"{dataset_path}/{flag}/processed/3_features/labels/*")

    data = []
    for i, file in enumerate(data_files):
        data.append(torch.load(f"{dataset_path}/{flag}/processed/3_features/labels/labels_{i}.pt"))
        print(f"--- loaded label file {i} from `{flag}` directory")
        if n_files != -1 and i == n_files - 1:
            break

    return data

In [4]:
parser = argparse.ArgumentParser()
args = parser.parse_args(args=[])

In [5]:
args.sbratio = 1
args.output_dim = 1000
args.model_dim = 1000 
args.n_heads = 4
args.dim_feedforward= 1000
args.n_layers= 4 
args.learning_rate = 0.00005 
args.n_head_layers = 2 
args.opt = "adam"
args.label = "zz-simCLR-trial"
args.load_path = f"/ssl-jet-vol-v2/JetCLR/models/experiments/{args.label}/final_model.pt"
args.trs = True
args.mask = False
args.cmask = True
args.batch_size = 128
args.trsw = 0.1
args.full_kinematics = False
args.num_files = 1

In [6]:
print( "loading data")
data = load_data("/ssl-jet-vol-v2/toptagging", "train", args.num_files)
labels = load_labels("/ssl-jet-vol-v2/toptagging", "train", args.num_files)
tr_dat_in = torch.concatenate(data, axis=0).numpy()  # Concatenate along the first axis
tr_lab_in = torch.concatenate(labels, axis=0).numpy()
# tr_dat_in = tr_dat_in[:10000]
# tr_lab_in = tr_lab_in[:10000]

# input dim to the transformer -> (pt,eta,phi)
input_dim = tr_dat_in.shape[1]
print("input_dim: ", input_dim)

# creating the training dataset
print( "shuffling data and doing the S/B split", flush=True )
tr_bkg_dat = tr_dat_in[ tr_lab_in==0 ].copy()
tr_sig_dat = tr_dat_in[ tr_lab_in==1 ].copy()
nbkg_tr = int( tr_bkg_dat.shape[0] )
nsig_tr = int( args.sbratio * nbkg_tr )
list_tr_dat = list( tr_bkg_dat[ 0:nbkg_tr ] ) + list( tr_sig_dat[ 0:nsig_tr ] )
list_tr_lab = [ 0 for i in range( nbkg_tr ) ] + [ 1 for i in range( nsig_tr ) ]
ldz_tr = list( zip( list_tr_dat, list_tr_lab ) )
random.shuffle( ldz_tr )
tr_dat, tr_lab = zip( *ldz_tr )
# reducing the training data
tr_dat = np.array( tr_dat )
tr_lab = np.array( tr_lab )

# create two validation sets: 
# one for training the linear classifier test (LCT)
# and one for testing on it
# we will do this just with tr_dat_in, but shuffled and split 50/50
# this should be fine because the jetCLR training doesn't use labels
# we want the LCT to use S/B=1 all the time
list_vl_dat = list( tr_dat_in.copy() )
list_vl_lab = list( tr_lab_in.copy() )
ldz_vl = list( zip( list_vl_dat, list_vl_lab ) )
random.shuffle( ldz_vl )
vl_dat, vl_lab = zip( *ldz_vl )
vl_dat = np.array( vl_dat )
vl_lab = np.array( vl_lab )
vl_len = vl_dat.shape[0]
vl_split_len = int( vl_len/2 )
vl_dat_1 = vl_dat[ 0:vl_split_len ]
vl_lab_1 = vl_lab[ 0:vl_split_len ]
vl_dat_2 = vl_dat[ -vl_split_len: ]
vl_lab_2 = vl_lab[ -vl_split_len: ]

loading data
--- loaded file 0 from `train` directory
--- loaded label file 0 from `train` directory
input_dim:  3
shuffling data and doing the S/B split


In [7]:
# set-up parameters for the LCT
linear_input_size = args.output_dim
linear_n_epochs = 750
linear_learning_rate = 0.001
linear_batch_size = 128

# initialise the network
print( "initialising the network", flush=True )
net = Transformer( input_dim, args.model_dim, args.output_dim, args.n_heads, args.dim_feedforward, args.n_layers, args.learning_rate, args.n_head_layers, dropout=0.1, opt=args.opt )
# send network to device
device = torch.device( "cuda" if torch.cuda.is_available() else "cpu" )
net.to( device )
# print(net)
net.load_state_dict(torch.load(f"{args.load_path}"))

initialising the network


<All keys matched successfully>

In [8]:
print( "starting the final LCT run", flush=True )
print("obtaining representations")
# evaluate the network on the testing data, applying some augmentations first if it's required
# if args.trs:
#     vl_dat_1 = translate_jets( vl_dat_1, width=args.trsw )
#     vl_dat_2 = translate_jets( vl_dat_2, width=args.trsw )
with torch.no_grad():
    net.eval()
    #vl_reps_1 = F.normalize( net.forward_batchwise( torch.Tensor( vl_dat_1 ).transpose(1,2), args.batch_size, use_mask=args.mask, use_continuous_mask=args.cmask ).detach().cpu(), dim=-1 ).numpy()
    #vl_reps_2 = F.normalize( net.forward_batchwise( torch.Tensor( vl_dat_2 ).transpose(1,2), args.batch_size, use_mask=args.mask, use_continuous_mask=args.cmask ).detach().cpu(), dim=-1 ).numpy()
    vl_reps_1 = net.forward_batchwise( torch.Tensor( vl_dat_1 ).transpose(1,2), args.batch_size, use_mask=args.mask, use_continuous_mask=args.cmask ).detach().cpu().numpy()
    vl_reps_2 = net.forward_batchwise( torch.Tensor( vl_dat_2 ).transpose(1,2), args.batch_size, use_mask=args.mask, use_continuous_mask=args.cmask ).detach().cpu().numpy()
    net.train()

starting the final LCT run
obtaining representations


In [9]:
print("finished obtaining representations, starting LCT")
# final LCT for each rep layer
for i in range(vl_reps_1.shape[1]):
    if i == 1:
        t3 = time.time()
        out_dat_f, out_lbs_f, losses_f = linear_classifier_test( linear_input_size, linear_batch_size, linear_n_epochs, "adam", linear_learning_rate, vl_reps_1[:,i,:], vl_lab_1, vl_reps_2[:,i,:], vl_lab_2 )
        auc, imtafe = get_perf_stats( out_lbs_f, out_dat_f )
        ep=0
        step_size = 25
        for lss in losses_f[::step_size]:
            print( f"(rep layer {i}) epoch: " + str( ep ) + ", loss: " + str( lss ), flush=True)
            ep+=step_size
        print( f"(rep layer {i}) auc: "+str( round(auc, 4) ), flush=True )
        print( f"(rep layer {i}) imtafe: "+str( round(imtafe, 1) ), flush=True)
        t4 = time.time()

print( "final LCT  done and output saved, time taken: " + str( np.round( t4-t3, 2 ) ), flush=True )
print("............................", flush=True)

finished obtaining representations, starting LCT
(rep layer 1) epoch: 0, loss: 49.869724
(rep layer 1) epoch: 25, loss: 49.88531
(rep layer 1) epoch: 50, loss: 49.87332
(rep layer 1) epoch: 75, loss: 49.87692
(rep layer 1) epoch: 100, loss: 49.869724
(rep layer 1) epoch: 125, loss: 49.878117
(rep layer 1) epoch: 150, loss: 49.872124
(rep layer 1) epoch: 175, loss: 49.878117
(rep layer 1) epoch: 200, loss: 49.87692
(rep layer 1) epoch: 225, loss: 49.870922
(rep layer 1) epoch: 250, loss: 49.879314
(rep layer 1) epoch: 275, loss: 49.878117
(rep layer 1) epoch: 300, loss: 49.87572
(rep layer 1) epoch: 325, loss: 49.87692
(rep layer 1) epoch: 350, loss: 49.87572
(rep layer 1) epoch: 375, loss: 49.879314
(rep layer 1) epoch: 400, loss: 49.880516
(rep layer 1) epoch: 425, loss: 49.88291
(rep layer 1) epoch: 450, loss: 49.872124
(rep layer 1) epoch: 475, loss: 49.881714
(rep layer 1) epoch: 500, loss: 49.87572
(rep layer 1) epoch: 525, loss: 49.87452
(rep layer 1) epoch: 550, loss: 49.87572
(

In [10]:
vl_reps_1.shape

(50000, 3, 1000)

In [11]:
vl_dat_1.shape

(50000, 3, 50)

In [12]:
vl_reps_1.shape[1]

3

In [13]:
print(net)

Transformer(
  (embedding): Linear(in_features=3, out_features=1000, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=1000, out_features=1000, bias=True)
        )
        (linear1): Linear(in_features=1000, out_features=1000, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=1000, out_features=1000, bias=True)
        (norm1): LayerNorm((1000,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((1000,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=1000, out_features=1000, bias=True)
        )
        (linear1): Linear(in_f