In [8]:
import numpy as np
import pandas as pd
import pickle
import torch
import torch.nn as nn
import os
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset, Sampler
import sys
sys.path.append('../FakeDatasetMaking/')
from pair_nomet_creation import KinematicDataset, EpochSampler
from copy import deepcopy
from tqdm import tqdm

In [9]:
modelsavepath='/home/ddemler/HNLclassifier/fnn_FeatureRegression/fnn_aug18all.pt'
pdsavepath='/home/ddemler/HNLclassifier/fnn_FeatureRegression/fnn_aug18all.csv'


In [10]:
train_dataset = KinematicDataset(num_events=1000000, seed=0)
input_dim, output_dim = train_dataset.usefulvariables()

print(input_dim, output_dim)

train_sampler = EpochSampler(train_dataset)

train_loader = DataLoader(train_dataset, batch_size=320, sampler=train_sampler)


48 39


In [11]:
val_dataset = KinematicDataset(num_events=500000, seed=10000)
input_dim, output_dim = val_dataset.usefulvariables()

val_sampler = EpochSampler(val_dataset)

val_loader = DataLoader(val_dataset, batch_size=320, sampler=val_sampler)

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class KinematicNet(nn.Module):
    def __init__(self):
        super(KinematicNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, int(input_dim*1.5))
        self.fc2 = nn.Linear(int(input_dim*1.5), int(input_dim//3))
        self.fc3 = nn.Linear(int(input_dim//3), output_dim)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)


class CustomKinematicNet(nn.Module):
    def __init__(self, input_size, hidden_layers, lenoutput, activation_fn=F.relu):
        """
        Args:
        - input_size (int): Size of the input layer.
        - hidden_layers (list of int): Sizes of each hidden layer.
        - lenoutput (int): Size of the output layer.
        - activation_fn (callable): Activation function to use.
        """
        super(CustomKinematicNet, self).__init__()
        
        # Create the list of layers
        layers = [nn.Linear(input_size, hidden_layers[0])]
        for i in range(len(hidden_layers) - 1):
            layers.append(nn.Linear(hidden_layers[i], hidden_layers[i + 1]))
        layers.append(nn.Linear(hidden_layers[-1], lenoutput))
        
        self.layers = nn.ModuleList(layers)
        self.activation_fn = activation_fn
        
    def forward(self, x):
        for layer in self.layers[:-1]:
            x = self.activation_fn(layer(x))
        return self.layers[-1](x)
    




def custom_train_loss(y_pred, y_true):
    se_loss = (y_pred - y_true) ** 2
    # print(se_loss.shape)
    num_features = int(output_dim / 3)
    y_pred_reshaped = y_pred.reshape(-1, 3, num_features)
    y_true_reshaped = y_true.reshape(-1, 3, num_features)

    
    # indice=[3,6,12]
    indice=[3,6,9,10,11,12]
    for i in indice:
        for j in range(3):
            y_p = y_pred_reshaped[:, j, i]
            y_t = y_true_reshaped[:, j, i]
            RMSE = ((y_p - y_t) / torch.abs(y_t))**2 #TODO: change for val too
            mask =  (y_t > 1)
            # print("mask shape", mask.shape)
            # print("RMSE shape", RMSE.shape)
            # print("se_loss shape", se_loss.shape)
            se_loss[mask, int(3*i+j)] = RMSE[mask]
    mse_loss = torch.mean(se_loss)

    return mse_loss

def custom_val_loss(y_pred, y_true):
    #TODO: for relative loss output 2 losses for relative and nonrelative
    se_loss = (y_pred - y_true) ** 2
    # print(se_loss.shape)
    num_features = int(output_dim / 3)
    y_pred_reshaped = y_pred.reshape(-1, 3, num_features)
    y_true_reshaped = y_true.reshape(-1, 3, num_features)
    loss_list=[]
    # indice=[3,6,12]
    indice=[3,6,9,10,11,12]
    for i in range(num_features):
        if i in indice:
            pairlosses=[]
            for j in range(3):
                y_p = y_pred_reshaped[:, j, i]
                y_t = y_true_reshaped[:, j, i]
                RMSE = torch.abs(y_p - y_t)**2 / torch.abs(y_t)
                mask = (y_t > 1)
                se_loss[mask, int(3*i+j)] = RMSE[mask]
                pairlosses.append(torch.mean(se_loss[:, int(3*i+j)]))
            loss_list.append(sum(pairlosses)/3)
        else:
            y_p = y_pred_reshaped[:, :, i].flatten()
            y_t = y_true_reshaped[:, :, i].flatten()
            loss = torch.mean((y_p - y_t) ** 2)
            loss_list.append(loss.item())
    mse= torch.mean(se_loss)
    return loss_list, mse
            




    
    # for i in indice:
    #     for j in range(3):
    #         y_p = y_pred_reshaped[:, j, i].flatten()
    #         y_t = y_true_reshaped[:, j, i].flatten()
    #         RMSE = torch.abs(y_p - y_t)**2 / torch.abs(y_t)
    #         mask = y_true[:] > 1
    #         se_loss[mask, int(3*i+j)] = RMSE[mask]
    # mse_loss = torch.mean(se_loss)

    # return mse_loss
    
    

# def custom_val_loss(y_pred, y_true):
#     num_features = int(output_dim / 3)
#     y_pred_reshaped = y_pred.reshape(-1, 3, num_features)
#     y_true_reshaped = y_true.reshape(-1, 3, num_features)
    
#     loss_list = []
    
#     for i in range(num_features):
#         y_p = y_pred_reshaped[:, :, i].flatten()
#         y_t = y_true_reshaped[:, :, i].flatten()
#         loss = torch.mean((y_p - y_t) ** 2)
#         loss_list.append(loss.item())

#     mse_loss = torch.mean((y_pred - y_true) ** 2)
    
#     return loss_list, mse_loss

        
        
        
def l2_regularization(model, lambda_reg):
    l2_reg = 0.0
    for W in model.parameters():
        l2_reg += torch.sum(W ** 2)
    return l2_reg * lambda_reg      
        



In [13]:
# hidden_layers=[64,72,82,92,102,112,122,132,142,132,122,112,102,92,82,72,64]
hidden_layers=[64,72,82,92,102,112,122,132,142,132,102,92,82]

model = CustomKinematicNet(input_size=input_dim, hidden_layers=hidden_layers, lenoutput=output_dim)
model.to(device)

CustomKinematicNet(
  (layers): ModuleList(
    (0): Linear(in_features=48, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=72, bias=True)
    (2): Linear(in_features=72, out_features=82, bias=True)
    (3): Linear(in_features=82, out_features=92, bias=True)
    (4): Linear(in_features=92, out_features=102, bias=True)
    (5): Linear(in_features=102, out_features=112, bias=True)
    (6): Linear(in_features=112, out_features=122, bias=True)
    (7): Linear(in_features=122, out_features=132, bias=True)
    (8): Linear(in_features=132, out_features=142, bias=True)
    (9): Linear(in_features=142, out_features=132, bias=True)
    (10): Linear(in_features=132, out_features=102, bias=True)
    (11): Linear(in_features=102, out_features=92, bias=True)
    (12): Linear(in_features=92, out_features=82, bias=True)
    (13): Linear(in_features=82, out_features=39, bias=True)
  )
)

In [14]:



optimizer=torch.optim.Adam(model.parameters(), lr=0.0001)
# loss_fn=nn.MSELoss()


out_feats=['deltaphi', 'deltaeta', 'deltaR', 'mt', 'norm_mt', 'mass', 'pt', 'eta' , 'phi',  'px', 'py', 'pz', 'energy']
losses_cols=['train_loss', 'val_loss', 'l2sum']+out_feats
losses_df=pd.DataFrame(columns=losses_cols)


numepochs=10000
best_loss=np.inf
for epoch in range(numepochs):
    model.train()
    train_loss=0
    l2sum=0
    for i, (x, y) in tqdm(enumerate(train_loader), total=len(train_loader), leave=False, position=0, disable=True):
        x=x.to(device)
        y=y.to(device)
        y_pred=model(x)
        original_loss = custom_train_loss(y_pred, y)
        l2_loss = l2_regularization(model, lambda_reg=1e-7)
        loss = original_loss + l2_loss
        l2sum+=l2_loss.item()
        train_loss += original_loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    
    model.eval()
    # patience=20
    with torch.no_grad():
        x,y=next(iter(val_loader))
        # x=val_loader
        x=x.to(device)
        y=y.to(device)
        y_pred=model(x)
        feats_loss, valloss = custom_val_loss(y_pred, y)
        # valloss=sum(feats_loss)/len(feats_loss)
        

        if valloss<best_loss:
            best_loss=valloss
            patience=30
            modelsave=deepcopy(model.state_dict())
            torch.save(modelsave, modelsavepath)
        else:
            patience-=1
            if patience==0:
                print('early stopping')
                break
    # indice=[3,6,12]
    indice=[3,6,9,10,11,12]
    for idx in indice:
        feats_loss[idx]=feats_loss[idx].cpu().float()

    valloss=valloss.cpu().float()
    valloss2=valloss.item()
    loss_strings = [f"{out_feats[i]}: {feats_loss[i]:.4e}" for i in range(len(out_feats))]
    loss_summary = ", ".join(loss_strings)
    loss_values = [train_loss/len(train_loader), valloss2, l2sum]
    loss_values.extend(feats_loss)
    # for losses in loss_values:
        # print(type(losses))
    losses_df.loc[epoch] = loss_values
    losses_df.to_csv(pdsavepath)
    # losses_df.loc[epoch]=[train_loss/len(train_loader), valloss2, feats_loss[0], feats_loss[1]]
    print(f"epoch: {epoch}, train: {train_loss/len(train_loader):.4e}, val: {valloss2:.4e}, {loss_summary}")


epoch: 0, train: 3.7289e+03, val: 3.1891e+03, deltaphi: 1.1466e+00, deltaeta: 1.0011e+00, deltaR: 1.4138e+00, mt: 1.0354e+02, norm_mt: 1.2556e-02, mass: 6.4773e+03, pt: 4.4667e+01, eta: 2.9379e+00, phi: 3.5253e+00, px: 4.2206e+02, py: 1.2765e+03, pz: 1.0060e+03, energy: 7.0577e+01
epoch: 1, train: 3.5097e+03, val: 4.1014e+03, deltaphi: 1.1029e+00, deltaeta: 1.0853e+00, deltaR: 1.4197e+00, mt: 4.5339e+02, norm_mt: 2.0809e-01, mass: 7.2819e+03, pt: 5.5095e+01, eta: 3.1852e+00, phi: 3.6198e+00, px: 6.0008e+02, py: 1.9807e+03, pz: 1.2763e+03, energy: 7.7067e+01
epoch: 2, train: 3.3152e+03, val: 2.9808e+03, deltaphi: 1.0313e+00, deltaeta: 8.7186e-01, deltaR: 1.2344e+00, mt: 9.7894e+01, norm_mt: 9.0642e-03, mass: 5.7864e+03, pt: 5.6439e+01, eta: 8.2209e+00, phi: 3.5171e+00, px: 4.6796e+02, py: 1.4867e+03, pz: 9.8402e+02, energy: 5.6034e+01
epoch: 3, train: 3.1055e+03, val: 3.1057e+03, deltaphi: 9.3699e-01, deltaeta: 8.3124e-01, deltaR: 9.9658e-01, mt: 2.8134e+02, norm_mt: 1.1543e-03, mass: 6

In [None]:
modelsavedpath='/home/ddemler/HNLclassifier/fnn_FeatureRegression/fnn_aug17all_oldloss.pt'
modelsave=CustomKinematicNet(input_size=input_dim, hidden_layers=hidden_layers, lenoutput=output_dim)
modelsave.load_state_dict(torch.load(modelsavedpath))
modelsave.to(device)
out_feats=['deltaphi', 'deltaeta', 'deltaR', 'mt', 'norm_mt', 'mass', 'pt', 'eta' , 'phi',  'px', 'py', 'pz', 'energy']


test_dataset = KinematicDataset(num_events=500000, seed=10000)
input_dim, output_dim = test_dataset.usefulvariables()
test_sampler = EpochSampler(train_dataset)

test_loader = DataLoader(test_dataset, batch_size=320, sampler=test_sampler)

residuals=np.array([])
y_total=np.array([])
y_pred_total=np.array([])
for i, (x,y) in enumerate(test_loader):
    x = x.to(device)
    y = y.to(device)
    y_pred = model(x)
    y_pred_total=np.append(y_pred_total, y_pred.cpu().detach().numpy())
    y_total=np.append(y_total, y.cpu().detach().numpy())


    # residuals=np.append(residuals, y_pred.cpu().detach().numpy() - y.cpu().detach().numpy())

    # residuals.append(y_pred.cpu().detach().numpy() - y.cpu().detach().numpy())


numfeatures=13
y_pred_total = y_pred_total.reshape(-1,3,numfeatures)
y_total = y_total.reshape(-1,3,numfeatures)

# print(y_pred_total.shape)
# print(y_total.shape)

residuals = [[] for _ in range(numfeatures)]
label_values = [[] for _ in range(numfeatures)]

for i in range(numfeatures):
    y_curr=y_total[:,:,i]
    # print("ycurr shape before reshape", y_curr.shape)
    y_curr=y_curr.reshape(500000*3,1)
    # print("ycurr shape after reshape", y_curr.shape)
    y_pred_curr=y_pred_total[:,:,i]
    y_pred_curr=y_pred_curr.reshape(500000*3,1)
    residuals_curr = y_pred_curr - y_curr
    residuals[i]=residuals_curr
    label_values[i]=y_curr

residuals = [np.array(res_list) for res_list in residuals]  # Convert lists of arrays to arrays
# residual_medians = [np.median(res) for res in residuals]
residual_std_devs = [np.std(res) for res in residuals]
residual_means = [np.mean(res) for res in residuals]


num_rows = 4
num_cols = 5


fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(15, 15))
flat_axes = axes.flatten()

for i, ax in enumerate(flat_axes[:numfeatures]):
    ax.hist(residuals[i], bins=100, edgecolor='k', alpha=0.65)
    ax.axvline(x=residual_means[i] + residual_std_devs[i], color='r', linestyle='--', label=f'+1 std = {residual_means[i] + residual_std_devs[i]:.2f})')
    ax.axvline(x=residual_means[i] - residual_std_devs[i], color='b', linestyle='--', label=f'-1 std = {residual_means[i] - residual_std_devs[i]:.2f})')
    ax.set_title(f'Residuals for {out_feats[i]}')
    ax.set_yscale('log')
    ax.set_xlabel('Residual Value')
    ax.set_ylabel('Frequency')
    ax.legend()
    
    # Display the mean value on the plot
    mean_text = f"Mean: {residual_means[i]:.2f}, std: {residual_std_devs[i]:.5f}"
    ax.text(0.6, 0.85, mean_text, transform=ax.transAxes)
    

for ax in flat_axes[numfeatures:]:
    ax.axis('off')

plt.tight_layout()
plt.show()
    
    


Generating data for seed: 10000


ValueError: cannot reshape array of size 3000000 into shape (1500000,1)