# ML in XENON
Build a feedforward neural network and train it to 
reconstruct positions in the XENON dark matter experiment
with Keras.

## Imports

In [1]:
import os
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator

import pandas as pd
print('pandas version = '+str(pd.__version__))
import numpy as np
from math import *

import keras
print('keras version = '+str(keras.__version__))
from keras.models import Sequential,Model,load_model
from keras.layers import Input,Dense,Dropout,Activation
from keras.optimizers import SGD,Adam
from keras.utils import to_categorical, plot_model

from ann_visualizer.visualize import ann_viz

pandas version = 0.25.0


Using TensorFlow backend.


keras version = 2.2.4


In [2]:
'''
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
''';

## Preparing the Data

In [3]:
def load_data(data_file, features=['PMT_Area_000','PMT_Area_126'], labels=['x_observed_nn','y_observed_nn']):
    if not os.path.exists(data_file):
        print ("Error: File {} does not exist.".format(data_file))
        return -1
    
    _data = pd.read_hdf(data_file, 'table')
    
    try:
        data_label = _data.loc[:,labels]
    except KeyError:
        data_label = None
    data_feature = _data.loc[:,features[0]:features[1]]
    _sum = data_feature.sum(axis=1)
    return (data_feature.divide(_sum, axis=0), data_label)

In [4]:
## Use above function to load the training and unfeatured datasets
(train_feature, train_label)     = load_data('app2019_ex10_training.h5')
(predict_feature, predict_label) = load_data('app2019_ex10_unlabeled.h5')

## Convert to numpy arrays
train_feature   = train_feature.to_numpy()
train_label     = train_label.to_numpy()
predict_feature = predict_feature.to_numpy()
predict_label   = predict_label.to_numpy()

## Normalize labels to [-0.5, 0.5] to make network easier to train 
## as using smaller, centered values is often better.
## Have to rescale later.
scaling_factor = 100
train_label = train_label/scaling_factor
predict_label = predict_label/scaling_factor

In [5]:
fig=plt.figure(figsize=(16, 10), dpi=150)

## Text options
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

## Labels
label_size = 32.5
label_pad = 13
title_size = 39
title_pad = 26

## Axes Parameters
axes = plt.gca()
axes.tick_params(axis='both') 

## Axes ---> Linewidth
for axis in ['top','bottom','left','right']:
    axes.spines[axis].set_linewidth(2.2)
    
## Axes ---> Ticks
axes.xaxis.set_minor_locator(AutoMinorLocator(5)) 
plt.tick_params(which='minor', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=6)
axes.yaxis.set_minor_locator(AutoMinorLocator(5))
plt.tick_params(which='minor', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=6)
plt.tick_params(which='major', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=12)

## Data plot
plt.scatter(scaling_factor*train_label[:,0],
            scaling_factor*train_label[:,1],
            marker='.',
            s=1,
            c='k',
           )

plt.xlim(-55,55)
plt.ylim(-55,55)

plt.xlabel('x [cm]', fontsize = label_size, labelpad = label_pad)
plt.ylabel('y [cm]', fontsize = label_size, labelpad = label_pad)
plt.title('Train Features', fontsize = title_size, pad = title_pad)

plt.gca().set_aspect('equal', adjustable='box')

plt.show()

TimeoutError: LOCKERROR: matplotlib is trying to acquire the lock
    '/home/ab602/.cache/matplotlib/tex.cache/.matplotlib_lock-*'
and has failed.  This maybe due to any other process holding this
lock.  If you are sure no other matplotlib process is running try
removing these folders and trying again.


<Figure size 2400x1500 with 1 Axes>

## Building the Model

In [6]:
pmts = int(train_feature.shape[1]) # number of PMTs = 127

model = Sequential([
    Dense(64, activation='relu', input_shape=(pmts,)),
    #Dropout(0.1),
    Dense(64, activation='relu'),
    #Dropout(0.1),
    Dense(2,  activation='linear'), # x and y
])

W0731 14:36:12.761116 140668243101504 deprecation_wrapper.py:119] From /home/ab602/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0731 14:36:12.770524 140668243101504 deprecation_wrapper.py:119] From /home/ab602/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0731 14:36:12.772465 140668243101504 deprecation_wrapper.py:119] From /home/ab602/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



## Compiling the Model

In [7]:
#opt = 'adam'
opt = Adam(lr=0.005)
#opt = 'sgd'
#opt = SGD(lr=0.01, momentum=0.9)
#opt = SGD(lr=0.001, momentum=0.9)
#opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
#opt = 'rmsprop'

model.compile(optimizer=opt,
              loss='mean_squared_error', 
              #metrics=['mean_squared_error', 'mean_squared_logarithmic_error', 'mean_absolute_error'],
             )

W0731 14:36:12.859599 140668243101504 deprecation_wrapper.py:119] From /home/ab602/anaconda3/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                8192      
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 130       
Total params: 12,482
Trainable params: 12,482
Non-trainable params: 0
_________________________________________________________________


In [9]:
plot_model(
    model, 
    to_file='model.png', 
    show_shapes=True, 
    show_layer_names=True,
    rankdir='TB' # TB = vertical plot, LR = horizontal plot
)

In [18]:
ann_viz(model, view=True, filename="model_graph", title="Model Visualization")

## Training the Model

In [11]:
batchsize = 128

history_fit = model.fit(
    ## List of numpy arrays of training data / features.
    train_feature,
    ## List of numpy arrays of target (label) data.
    train_label,
    ## Number of epochs (iterations over the entire x and y data provided).
    epochs=100,
    ## Number of samples per gradient update. 
    ## If unspecified, batch_size will default to 32.
    ## Popular batch sizes in mini-batch gradient descent: 32, 64, 128 samples.
    batch_size=batchsize, 
    ## Fraction of the training data to be used as validation data. 
    ## The model will set apart this fraction of the training data, 
    ## will not train on it, and will evaluate the loss and any model metrics on 
    ## this data at the end of each epoch. 
    ## The validation data is selected from the last samples in the x and y 
    ## data provided, before shuffling.
    validation_split=0.2,
    ## Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.
    verbose=1,      
)

W0731 14:36:17.114848 140668243101504 deprecation_wrapper.py:119] From /home/ab602/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

W0731 14:36:17.217519 140668243101504 deprecation_wrapper.py:119] From /home/ab602/anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.



Train on 197353 samples, validate on 49339 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100


Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [12]:
print(history_fit.history.keys())

dict_keys(['val_loss', 'loss'])


In [13]:
fig=plt.figure(figsize=(16, 10), dpi=150)

## Text options
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

## Labels
label_size = 32.5
label_pad = 13
title_size = 39
title_pad = 26

## Axes Parameters
axes = plt.gca()
axes.tick_params(axis='both') 

## Axes ---> Linewidth
for axis in ['top','bottom','left','right']:
    axes.spines[axis].set_linewidth(2.2)
    
## Axes ---> Ticks
axes.xaxis.set_minor_locator(AutoMinorLocator(5)) 
plt.tick_params(which='minor', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=6)
axes.yaxis.set_minor_locator(AutoMinorLocator(5))
plt.tick_params(which='minor', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=6)
plt.tick_params(which='major', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=12)

## Data plot
plt.plot(history_fit.history["loss"], label = 'Train', linewidth = 2.5)
plt.plot(history_fit.history["val_loss"], label = 'Validation', linewidth = 2.5)
#plt.yscale('log')

plt.xlabel('Epoch', fontsize = label_size, labelpad = label_pad)
plt.ylabel('Mean Squared Error', fontsize = label_size, labelpad = label_pad)
plt.title('Loss', fontsize = title_size, pad = title_pad)

## Legend
leg = plt.legend(loc=1, bbox_to_anchor=(0.99,0.99), fontsize=0.9*label_size, frameon=True, shadow=False, edgecolor='black', fancybox=False)
for line in leg.get_lines(): # enumerated
    line.set_linewidth(4)
leg.get_frame().set_linewidth(2.5)

plt.show()

TimeoutError: LOCKERROR: matplotlib is trying to acquire the lock
    '/home/ab602/.cache/matplotlib/tex.cache/.matplotlib_lock-*'
and has failed.  This maybe due to any other process holding this
lock.  If you are sure no other matplotlib process is running try
removing these folders and trying again.


<Figure size 2400x1500 with 1 Axes>

## Testing the Model

In [14]:
'''
model.evaluate(
  predict_feature,
  predict_label
)
'''

model.evaluate(
    train_feature,
    train_label,
    batch_size = batchsize
)



5.680351515150852e-06

## Using the Model

In [15]:
## Save the model('s weights) to disk
#model.save_weights('model_weights.h5')
model.save('model.h5')

## Load the model('s saved weights)
#model = load_model('model.h5')
#model.load_weights('model_weights.h5')

In [16]:
predictions = model.predict(predict_feature)
predictions = predictions*scaling_factor

In [17]:
fig=plt.figure(figsize=(16, 10), dpi=150)

## Text options
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

## Labels
label_size = 32.5
label_pad = 13
title_size = 39
title_pad = 26

## Axes Parameters
axes = plt.gca()
axes.tick_params(axis='both') 

## Axes ---> Linewidth
for axis in ['top','bottom','left','right']:
    axes.spines[axis].set_linewidth(2.2)
    
## Axes ---> Ticks
axes.xaxis.set_minor_locator(AutoMinorLocator(5)) 
plt.tick_params(which='minor', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=6)
axes.yaxis.set_minor_locator(AutoMinorLocator(5))
plt.tick_params(which='minor', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=6)
plt.tick_params(which='major', direction='in', labelsize=label_size, top=True, right=True, width=1.2, length=12)

## Data plot
plt.scatter(predictions[:,0],
            predictions[:,1],
            marker='.',
            s=1,
            c='k',
           )

plt.xlim(-55,55)
plt.ylim(-55,55)

plt.xlabel('x$_\mathrm{NN}$ [cm]', fontsize = label_size, labelpad = label_pad)
plt.ylabel('y$_\mathrm{NN}$ [cm]', fontsize = label_size, labelpad = label_pad)
plt.title('Reconstructed Positions', fontsize = title_size, pad = title_pad)

plt.gca().set_aspect('equal', adjustable='box')

plt.show()

TimeoutError: LOCKERROR: matplotlib is trying to acquire the lock
    '/home/ab602/.cache/matplotlib/tex.cache/.matplotlib_lock-*'
and has failed.  This maybe due to any other process holding this
lock.  If you are sure no other matplotlib process is running try
removing these folders and trying again.


<Figure size 2400x1500 with 1 Axes>