In [1]:
import pandas as pd
import numpy as np
# import mplhep as hep
from tqdm import tqdm
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_columns', 150)

import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
# pd.set_option('display.max_columns', 150)

In [95]:
filecode = 'InfA_RD_DPrmvd'
# filecode = parquetcode


train_hp = {
    "lr":0.0002,
    "batch_size":500000,
    "N_epochs":100,
    "seed":0,
    'eplim':-1
}
nodes = [20,20]


# Pre-define

In [7]:
def set_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
set_seed(train_hp['seed'])

def hess_to_tensor(H):
    hess_elements = []
    for i in range(len(H)):
        for j in range(len(H)):
            hess_elements.append(H[i][j].reshape(1))
    return torch.cat(hess_elements).reshape(len(H),len(H))

In [110]:
#Define the Net
class Net(nn.Module):
    def __init__(self, n_features=40, nodes=[100,100], output_nodes=5):
        super(Net, self).__init__()
        # Build network
        n_nodes = [n_features] + nodes + [output_nodes]
        self.layers = nn.ModuleList()
        for i in range(len(n_nodes)-1):
            self.layers.append(nn.Linear(n_nodes[i], n_nodes[i+1]))
            self.layers.append(nn.ReLU())

    def forward(self, x):
        out = self.layers[0](x)
        for layer in self.layers[1:]:
            out = layer(out)
        # Apply softmax
        return torch.softmax(out, dim=1)



class InfAwareLoss(nn.Module):
    def __init__(self,i,epoch):
        super(InfAwareLoss, self).__init__()
        self.i = i
        self.epoch =epoch
    
    def forward(self,input,target,weight):
        while self.i > self.epoch:
            # Input = torch.tensor(input)
            # Target = torch.tensor(target,dtype=torch.int8)
            
            label = torch.argmax(target,dim=1)
            pred = torch.argmax(input,dim=1)
            cm = torch.zeros(7,7)
            cm_clone = cm.clone()
            for t, p, w in zip(label.view(-1), pred.view(-1), weight.view(-1)):
                cm_clone[p,t] += w
            cm = cm_clone
            cm =cm[1:, :]
            O = cm.sum(dim=1)
            def NLL(mu):
                mu0 =torch.tensor([1.0])
                theta = torch.cat((mu0,mu))
                return -(O@(torch.log(cm@theta))-(cm@theta).sum())
            mu = torch.tensor([1.0,1.0,1.0,1.0,1.0,1.0])
            hess = torch.func.hessian(NLL)(mu)
            I = torch.inverse(hess_to_tensor(hess))
            loss = torch.trace(I)**0.5 
            return loss.clone().detach().requires_grad_(True)
        else:
            return torch.tensor([0.0],requires_grad=True)


In [114]:
#Define the trainning function
from NNfunctions import get_batches, get_total_loss
def train_network_cross_entropy(model, X_train,X_test,y_train,y_test,w_train,w_test, train_hp={}):
    optimiser = torch.optim.Adam(model.parameters(), lr=train_hp["lr"])
    X_train =X_train.to_numpy()
    X_test = X_test.to_numpy()
    y_train = y_train.to_numpy()
    y_test = y_test.to_numpy()
    w_train = w_train.to_numpy()
    w_test = w_test.to_numpy()
    
    
    ce_loss = nn.CrossEntropyLoss()
    train_loss, test_loss = [], []
    i = 0
    eplim = train_hp['eplim']
    epchs = train_hp['N_epochs']

    print(">> Training...")
    with tqdm(range(train_hp["N_epochs"])) as t:
        for i_epoch in t:
            model.train()
            # print(i)
            # "get_batches": function defined in statml_tools.py to separate the training data into batches
            batch_gen = get_batches([X_train, y_train, w_train], batch_size=train_hp['batch_size'],
                                    randomise=True, include_remainder=False
                                )
            ia_loss = InfAwareLoss(i,eplim)
            for X_tensor, y_tensor, w_tensor in batch_gen:
                optimiser.zero_grad()
                output = model(X_tensor)
                # print(output)
                ia = ia_loss(output, y_tensor, w_tensor)
                ce = ce_loss(output, y_tensor)
                
                if i <= eplim:
                    loss = ce
                else:
                    loss = ia
                loss.backward()
                optimiser.step()
                

            model.eval()
            
            if i>eplim:
                Loss = ia_loss
            else:
                Loss = ce_loss
            # "get_total_loss": function defined in statml_tools.py to evaluate the network in batches (useful for large datasets)
            train_loss.append(get_total_loss(model, Loss, X_train, y_train,w_train))
            test_loss.append(get_total_loss(model, Loss, X_test, y_test,w_test))
            t.set_postfix(train_loss=train_loss[-1], test_loss=test_loss[-1])
            i+=1

    print(">> Training finished")
    model.eval()

    return model, train_loss, test_loss



# Models

In [115]:
oc = np.load(f'/vols/cms/hw423/Data/Week14/octest_{filecode}.npy')
df = pd.DataFrame(oc)
mi_series = pd.read_csv('/vols/cms/hw423/Week6/MI_balanced.csv')

model_ia = Net(n_features=7, nodes=nodes, output_nodes=7)
dfx=df
label = pd.read_pickle('/vols/cms/hw423/Data/Week14/Label.pkl')
dfy = pd.get_dummies(label)
dfw = pd.read_pickle('/vols/cms/hw423/Data/Week14/weight.pkl')

X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(dfx, dfy,dfw, test_size=0.2, random_state=42)



In [116]:
model_bce, train_loss_bce, test_loss_bce = train_network_cross_entropy(model_ia, X_train, X_test, y_train, y_test,w_train,w_test, train_hp=train_hp)


data = pd.read_parquet('/vols/cms/hw423/Data/Week14/df_InfA_RD_DPrmvd.parquet')
col = ['$\gamma\gamma$','ggH','qqH','WH','ZH','ttH','tH']
oc= model_bce(torch.tensor(data.to_numpy(),dtype=torch.float32))
octest =pd.DataFrame(oc.detach(), columns = col, index = data.index)
np.save(f'/vols/cms/hw423/Data/Week14/octest_{filecode}.npy', np.array(octest))

>> Training...


  0%|          | 0/100 [01:21<?, ?it/s]


TypeError: InfAwareLoss.forward() missing 1 required positional argument: 'weight'

In [82]:
# oc = np.load(f'/vols/cms/hw423/Data/Week14/octest_{filecode}.npy')
oc = np.load(f'/vols/cms/hw423/Data/Week14/octest_InfA_RD_DPrmvd.npy')
df = pd.DataFrame(oc)
label = pd.read_pickle('/vols/cms/hw423/Data/Week14/Label.pkl')
weight = pd.read_pickle('/vols/cms/hw423/Data/Week14/weight.pkl')
weight

0          1.026104e-05
1          2.839050e-06
2          1.184555e-06
3          2.860377e-06
4          2.077187e-06
               ...     
2914418    9.387862e-11
2914419    9.846524e-11
2914420    8.756236e-11
2914421    6.371580e-11
2914422    8.610738e-11
Name: weight, Length: 2914423, dtype: float32

In [91]:
X_train

Unnamed: 0,0,1,2,3,4,5,6
527151,0.000094,0.728396,0.000094,0.029366,0.241861,0.000094,0.000094
1440105,0.000091,0.788625,0.000118,0.023760,0.187225,0.000091,0.000091
2237332,0.001539,0.005128,0.011481,0.001539,0.005213,0.885481,0.089619
681188,0.006859,0.070842,0.192470,0.006859,0.036199,0.186599,0.500171
1148646,0.000425,0.554309,0.000649,0.061152,0.382614,0.000425,0.000425
...,...,...,...,...,...,...,...
1692743,0.004447,0.025748,0.101407,0.004447,0.011072,0.586202,0.266676
2356330,0.001893,0.007540,0.013207,0.001893,0.006946,0.815757,0.152765
2229084,0.001617,0.001617,0.001617,0.126245,0.006038,0.339457,0.523407
2768307,0.005742,0.020084,0.071224,0.005742,0.041305,0.449327,0.406576
