<a href="https://colab.research.google.com/github/AritraStark/E2E_GSOC_2022/blob/main/E2E_eval_task_1_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Common Task 1. Electron/photon classification**

**Datasets:**

https://cernbox.cern.ch/index.php/s/AtBT8y4MiQYFcgc (photons)

https://cernbox.cern.ch/index.php/s/FbXw3V4XNyYB3oA (electrons)

**Description:** 32x32 matrices (two channels - hit energy and time) for two classes of particles electrons and photons impinging on a calorimeter Please use a deep learning method of your choice to achieve the highest possible classification on this dataset (we ask that you do it both in Keras/Tensorflow and in PyTorch). Please provide a Jupyter notebook that shows your solution. The model yousubmit should have a ROC AUC score of at least 0.80.



Downloading the dataset: 

In [None]:
!wget https://cernbox.cern.ch/index.php/s/AtBT8y4MiQYFcgc/download -O photons.hdf5
!wget https://cernbox.cern.ch/index.php/s/FbXw3V4XNyYB3oA/download -O electrons.hdf5

In [None]:
from sklearn.model_selection import train_test_split
particles_x_train,particles_x_val,particles_y_train,particles_y_val=train_test_split(particles_x_train,particles_y_train,random_state=48,test_size=0.2)

In [None]:
import torch
from torch.utils.data import DataLoader,TensorDataset
from torch import Tensor

#first load the data into tensor datasets
train_dataset=TensorDataset(Tensor(particles_x_train),Tensor(particles_y_train))
val_dataset=TensorDataset(Tensor(particles_x_val),Tensor(particles_y_val))
test_dataset=TensorDataset(Tensor(particles_x_test),Tensor(particles_y_test))

#next load the tensor datasets into Dataloaders and make sure to activate the shuffle feature
trainloader=DataLoader(train_dataset,shuffle=True,batch_size=2000)
validloader=DataLoader(val_dataset,shuffle=True,batch_size=2000)
testloader=DataLoader(test_dataset,shuffle=True,batch_size=2000)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
class Network(nn.Module):
## constructor definition
  def __init__(self):
    super().__init__()
    self.fc1=nn.Linear(32*32,256)
    self.fc2=nn.Linear(256,256)
    self.fc3=nn.Linear(256,256)
    self.fc4=nn.Linear(256,256)
    self.fc5=nn.Linear(256,1)
    self.dropout=nn.Dropout(0.5)

## forward method definition
  def forward(self,x):
    x=x.view(x.shape[0],-1)
    x=F.relu(self.fc1(x))
    x=self.dropout(x)
    x=F.relu(self.fc2(x))
    x=self.dropout(x)
    x=F.relu(self.fc3(x))
    x=self.dropout(x)
    x=F.relu(self.fc4(x)) 
    x=self.dropout(x)   
    x=self.fc5(x).reshape(-1)
    return x

#now we can instantiate the Neural Network
model=Network().cuda()

In [None]:
criterion=nn.BCEWithLogitsLoss().cuda()

In [None]:
import torch.optim as optim

optimizer=optim.Adam(model.parameters(),lr=0.001)

In [None]:
#number of epochs to train model for
epochs=100

#keep track of the train and validation losses over each epoch
train_losses=[]
val_losses=[]

#keep track of the lowest validation loss
min_val_loss=np.inf

#keep track of the epoch with the lowest validation loss
best_epoch=0

In [None]:
#loop over number of epochs
for i in range(epochs):
  train_loss=0
  val_loss=0
  #set model to train mode
  model.train()
  for data,labels in trainloader:
    
    #zeroing the gradients saved with the optimizer
    optimizer.zero_grad()
    
    #forward propagation
    probs=model(data.cuda())   
    #loss evaluation
    loss=criterion(probs,labels.cuda())
    
    #backward propagation
    loss.backward()
    
    #updating weights
    optimizer.step()
    
    #accumulating training loss over epoch
    train_loss+= loss.item()* data.shape[0]

  #set model to evaluation mode
  model.eval()
  for data,labels in validloader:
    #forward propagation
    probs=model(data.cuda())

    #loss evaluation
    loss=criterion(probs,labels.cuda())
    
    #accumulating validation loss
    val_loss+= loss.item()* data.shape[0]

  #computing average loss per epoch
  train_loss=train_loss/len(train_dataset)
  val_loss=val_loss/len(val_dataset)

  #save average train loss per epoch
  train_losses.append(train_loss)
  #save average validation loss per epoch
  val_losses.append(val_loss)

  print(f"Epoch {i}:\t train loss:{train_loss:.6f}\t val_loss:{val_loss:.6f}")
  
  #keep track of the weights of the model with the lowest validation loss  
  if val_loss<= min_val_loss:
    print(f"val loss decreased from {min_val_loss:.6f} to {val_loss:.6f}\n")
    torch.save(model.state_dict(),"model"+str(i)+".pth")
    min_val_loss=val_loss
    best_epoch=i

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_losses,'r')
plt.plot(val_losses,'b')
plt.legend(['train','validation'])
plt.show()

In [None]:
model.load_state_dict(torch.load("model"+str(best_epoch)+".pth"))

In [None]:
#set the model to evaluation mode
model.eval()

#keep track of the correct predictions
class_correct=[0. for i in range(2)]

#keep track of the total number of predictions
class_total=[0. for i in range(2)]

test_loss=0

for data,labels in testloader:
  #forward propagation
  probs=model(data.cuda())
  #evaluate loss
  loss=criterion(probs,labels.cuda())

  #accumulate loss over the test dataset
  test_loss+= loss.item()*data.shape[0]
  
  #calculate the predicted class
  predicted=torch.Tensor([0 if p<0.5 else 1 for p in torch.sigmoid(probs)])
  correct=predicted.eq(labels.data)
  
  #save correct predictions and total predictions
  for i in range(len(labels)):
    label=int(labels.data[i])
    class_correct[label]+= correct[i].item()
    class_total[label]+=1

#calculate average test loss
test_loss=test_loss/len(test_dataset)
print(f'test loss:  {test_loss:.6f}\n')

#calculate class accuracy
for i in range(2):
  acc=100*class_correct[i]/class_total[i]
  print(f"Test accuracy of {i}: {acc:.2f}%")

#compute average test accuracy
overall_test_accuracy=100*np.sum(class_correct)/np.sum(class_total)
print(f"\n Overall test accuracy: {overall_test_accuracy:.2f}%")
