In [1]:
#!pip install tensorflow==2

In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from tensorflow.keras.utils import to_categorical # For y values
from tensorflow.keras.models import Sequential
from tensorflow.keras import metrics
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import ModelCheckpoint

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

In [3]:
np.random.seed(123)  # for reproducibility

## Some Helping Functions

In [4]:
def print_heatmap(data):
    heatmap = sns.heatmap(data)
    heatmap.set(xlabel='Pseudorapidity', ylabel='Azimuthal Angle')
    print(heatmap)
    #plt.imshow(data, cmap='jet', interpolation='nearest')
    #plt.show()

In [5]:
def get_predicted_values(output_values):
    predicted_values = []
    for probability in output_values:
        if probability[0] > probability[1]:
            predicted_values.append(0)
        else:
            predicted_values.append(1)
    return predicted_values

# Reading the data

First we read the Signal Data and produce a heatmap from the average of all lines. We do so, in order to get the feeling of what our data looks like.

In [6]:
full_data = pd.read_csv('full_data.csv', )
full_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,616,617,618,619,620,621,622,623,624,class
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [7]:
full_data.shape

(22596, 626)

In [8]:
full_data['class'].sum()

11298

Now let's examine how our data looks like.

# Defining and Training the model

Split the full dataset into _test_ and _train_ data in a 70-30% rate.

In [9]:
train_data, test_data = train_test_split(full_data, test_size = 0.30)
print(train_data.shape)
print(test_data.shape)

(15817, 626)
(6779, 626)


The _Keras_ framework, in order to train its network must receive the dependent and independent variables in  separated tables.

In [10]:
X_train = train_data.iloc[:, :-1]
img_rows = X_train.shape[0]
img_cols = X_train.shape[1]
input_shape = (img_rows, img_cols,1)
Y_train = train_data.iloc[:,-1:]
print(img_rows, img_cols)
X_test = test_data.iloc[:, :-1]
print(X_test.shape)
Y_test = test_data.iloc[:,-1:]
print(Y_test.shape)
input_shape

15817 625
(6779, 625)
(6779, 1)


(15817, 625, 1)

In [11]:
from sklearn import preprocessing
X_train_scaled = preprocessing.scale(X_train)
X_test_scaled = preprocessing.scale(X_test)

In [12]:
X_train = np.asarray(X_train_scaled).reshape(X_train_scaled.shape[0], 25, 25, 1)
X_train.shape

(15817, 25, 25, 1)

In [13]:
X_test= np.asarray(X_test_scaled).reshape(X_test_scaled.shape[0], 25, 25, 1)
X_test.shape

(6779, 25, 25, 1)

In [14]:
filepath = "./melhor_modelo.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=2, save_best_only=True, mode='max', save_weights_only=False)

model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(25, 25, 1)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(128, activation='sigmoid'))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(2, activation='softmax'))

#sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 23, 23, 16)        160       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 21, 21, 32)        4640      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 10, 10, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 10, 10, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 8, 64)          18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 6, 6, 128)         73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 3, 3, 128)         0

We must convert the dependent variable to be a probability distribution of the possible outcomes, thus, a value of output `1` must become the probabilities `(0, 1)`. Conversely, a `0` outcome value must become the pair `(1, 0)`.

In [15]:
Y_train = to_categorical(Y_train.values, num_classes=2)

In [16]:
Y_train

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

## Neural Network Architecture Definition

We defined a simple NN, with only two hidden layer.

After defining the NN architecture we train it using the `fit` method. We trained it for 50 epochs (backpropagation cycles). 

In [17]:
BATCH_SIZE = 500
EPOCHS = 10

history = model.fit(X_train,
                      Y_train,
                      #batch_size=BATCH_SIZE,
                      epochs=EPOCHS,
                      callbacks=[checkpoint],
                      validation_split=0.2,
                      verbose=1)

Train on 12653 samples, validate on 3164 samples
Epoch 1/10
Epoch 00001: val_accuracy improved from -inf to 0.72345, saving model to ./melhor_modelo.hdf5
Epoch 2/10
Epoch 00002: val_accuracy improved from 0.72345 to 0.74716, saving model to ./melhor_modelo.hdf5
Epoch 3/10
Epoch 00003: val_accuracy improved from 0.74716 to 0.75253, saving model to ./melhor_modelo.hdf5
Epoch 4/10
Epoch 00004: val_accuracy improved from 0.75253 to 0.75316, saving model to ./melhor_modelo.hdf5
Epoch 5/10
Epoch 00005: val_accuracy improved from 0.75316 to 0.76169, saving model to ./melhor_modelo.hdf5
Epoch 6/10
Epoch 00006: val_accuracy did not improve from 0.76169
Epoch 7/10
Epoch 00007: val_accuracy did not improve from 0.76169
Epoch 8/10
Epoch 00008: val_accuracy did not improve from 0.76169
Epoch 9/10
Epoch 00009: val_accuracy improved from 0.76169 to 0.76485, saving model to ./melhor_modelo.hdf5
Epoch 10/10
Epoch 00010: val_accuracy improved from 0.76485 to 0.76549, saving model to ./melhor_modelo.hdf5

After training the model we need to evaluate it.

In [18]:
Y_test = to_categorical(Y_test.values, num_classes=2)

In [19]:
from tensorflow.keras.models import load_model
best_model = load_model(filepath)

In [20]:
score = best_model.evaluate(X_test, Y_test, verbose=0)
print(model.metrics_names)
print(score)

['loss', 'accuracy']
[0.4745970973576007, 0.7858091]


Now let's take a look at the missclassified observations.

In [21]:
Y_test_predicted = model.predict(X_test, verbose=0)

In [22]:
Y_test_predicted

array([[0.7040813 , 0.29591876],
       [0.9477712 , 0.05222877],
       [0.02068555, 0.9793145 ],
       ...,
       [0.9596773 , 0.04032274],
       [0.04797025, 0.9520297 ],
       [0.3260052 , 0.6739947 ]], dtype=float32)

Now we calculate the false negatives and also the false positives by comparing the true value with the predicted one.

In [23]:
Y_test_predicted_values = get_predicted_values(Y_test_predicted)
Y_test_values = get_predicted_values(Y_test)
print(Y_test_predicted_values[1:10])
print(Y_test_values[1:10])

false_positives = []
false_negatives = []
for i in range(len(Y_test_values)):
    if Y_test_values[i] == 0 and Y_test_predicted_values[i] == 1:
        false_positives.append(i)
    elif Y_test_values[i] == 1 and Y_test_predicted_values[i] == 0:
        false_negatives.append(i)
        
print("False Positive Rate: {:.2f}".format(len(false_positives)/len(Y_test_values)))
print("False Negative Rate: {:.2f}".format(len(false_negatives)/len(Y_test_values)))

[0, 1, 1, 0, 0, 1, 1, 0, 0]
[0, 1, 1, 0, 0, 1, 1, 0, 0]
False Positive Rate: 0.12
False Negative Rate: 0.09


And try to visualize the heatmaps for false positives and negatives. Firstly the false negatives, where the network was supposed to answer _Signal_, but instead, it answered _Background_.

In [24]:
print(false_negatives[1:10])
false_negatives_values = X_test.iloc[false_negatives,]
print(false_negatives_values.shape)

mean_false_negatives = false_negatives_values.mean().values
print(mean_false_negatives.shape)
mean_false_negatives = mean_false_negatives.reshape((25,25))

print_heatmap(mean_false_negatives)

[33, 45, 57, 61, 65, 81, 94, 106, 113]


AttributeError: 'numpy.ndarray' object has no attribute 'iloc'

Then we examine the cases in which the network should have responded _Background_, but it answered _Signal_.

In [None]:
print(false_positives[1:10])
false_positives_values = X_test.iloc[false_positives,]
print(false_positives_values.shape)

mean_false_positives = false_positives_values.mean().values
print(mean_false_positives.shape)
mean_false_positives = mean_false_positives.reshape((25,25))

print_heatmap(mean_false_positives)