In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import h5py
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split

## Reading data

### Electron Dataset

In [3]:
data_elec = h5py.File('/content/drive/MyDrive/GSOC/Application/ML4Sci/Task 1/SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5','r')
data_elec.keys()

<KeysViewHDF5 ['X', 'y']>

In [4]:
electron_X = data_elec['X']
electron_Y = data_elec['y']

In [5]:
#checking shapes
print(electron_X.shape)
print(electron_Y.shape)

(249000, 32, 32, 2)
(249000,)


### Photon Dataset

In [6]:
data_pho = h5py.File('/content/drive/MyDrive/GSOC/Application/ML4Sci/Task 1/SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5','r')

In [7]:
photon_X = data_pho['X']
photon_Y = data_pho['y']

In [8]:
print(photon_X.shape)
print(photon_Y.shape)

(249000, 32, 32, 2)
(249000,)


In [9]:
#checking assignment of labels
print(electron_Y[42])
print(photon_Y[42])

1.0
0.0


In [10]:
#joining both datasets
X = np.concatenate((np.array(photon_X), np.array(electron_X)))
Y = np.concatenate((np.array(photon_Y), np.array(electron_Y)))

In [11]:
#checking shapes
print(X.shape)
print(Y.shape)

(498000, 32, 32, 2)
(498000,)


## Building model

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42) #test split of 0.25

In [13]:
X_train.shape

(373500, 32, 32, 2)

In [14]:
y_train, y_test = y_train.reshape(y_train.shape[0],1), y_test.reshape(y_test.shape[0],1)

### Tensorflow version

### CNN

In [17]:
from keras.layers import LSTM
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Flatten, Dropout, Activation, Lambda, Permute, Reshape

In [21]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, GlobalAveragePooling2D

In [21]:
#create model
model = Sequential()
#add model layers
model.add(Conv2D(128, kernel_size= (3,3), activation= 'relu', input_shape=(32,32,2)))
model.add(Conv2D(64, kernel_size= (3,3), activation= 'relu'))
model.add(MaxPooling2D((2, 2), padding = 'valid'))
model.add(Conv2D(32, kernel_size= (3, 3), activation= 'relu'))
model.add(Conv2D(32, kernel_size= (3, 3), activation= 'relu'))
model.add(Conv2D(32, kernel_size= (3, 3), activation= 'relu'))
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1, activation= 'sigmoid'))

In [22]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs = 10,batch_size = 256)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f45900b37d0>

### LSTM

In [15]:
X_train, X_test = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]*X_train.shape[2]*X_train.shape[3]), X_test.reshape(X_test.shape[0], 1, X_test.shape[1]*X_test.shape[2]*X_test.shape[3])

In [18]:
model = Sequential()

input_shape=(X_train.shape[1],X_train.shape[2])

model.add(LSTM(units= 64,return_sequences=True,input_shape=input_shape))
model.add(Dropout(0.2))

model.add(LSTM(units=32,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=16,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(units=16,input_shape=input_shape)) 
model.add(Dropout(0.2))

model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics = 'accuracy')

In [19]:
model.fit(X_train,y_train, validation_data = (X_test, y_test), epochs= 10,batch_size= 128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fdba00c0490>

### VGGNet

In [None]:
vgg = tf.keras.applications.VGG16(include_top = False, input_shape = (X_train.shape[1],X_train.shape[2],X_train.shape[3]), classes = 2, classifier_activation= 'sigmoid', weights = None )

In [None]:
global_avg = GlobalAveragePooling2D()
pred_layer = Dense(1, activation='sigmoid')
net = Sequential([vgg,global_avg,pred_layer])

In [None]:
net.compile(optimizer='adam', loss='binary_crossentropy', metrics = 'accuracy')
net.fit(X_train,y_train, validation_data = (X_test, y_test), epochs= 10,batch_size= 256)

## Pytorch

CNN was best so trying that out in pytorch

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
from torchvision import datasets, transforms 
from torch.utils import data  
dataset = [X,Y]
loader = data.DataLoader(dataset, batch_size = 8, shuffle = True)

In [None]:
class NeuralNet (nn.Module):
  def __init__(self):
    super(NeuralNet, self).__init__()
    self.conv1 = nn.Conv2d(2,128,3,1)
    self.conv2 = nn.Conv2d(128,64,3,1)
    self.conv3 = nn.Conv2d(64,32,3,1)
    self.conv4 = nn.Conv2d(32,32,3,1)
    self.fc1 = nn.Linear(128,32)
    self.fc2 = nn.Linear(32,1)

  def forward(self,x):
    x = self.conv1(x)
    x = F.relu(x)
    x = F.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x,2)
    x = self.conv3(x)
    x = F.relu(x)
    x = self.conv4(x)
    x = F.relu(x)
    x = self.conv4(x)
    x = F.relu(x)
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    output = F.sigmoid(x, dim = 1 )
    return output

In [None]:
model = NeuralNet()
optimizer = optim.Adam(model.parameters())

for (i,1) in trainloader:
  optimizer.zero_grad()
  output = model(i)
  loss = F.bce(output,1)
  loss.backward()
  optimizer.step

model.eval()
test_loss = 0

with torch.no_grad():
    for data, target in testloader:
        output = model(data)
        test_loss += F.nll_loss(output, target, 
                         reduction='sum').item() 
        pred = output.argmax(dim=1, keepdim=True)  
        correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(testloader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(testloader.dataset),
    100. * correct / len(testloader.dataset)))