This is a program that will train a model to identify and assign hits to tracks.
Written by Daniel Zurawski & Keshav Kapoor for Fermilab Summer 2017 internship.

In [None]:
import keras
import random
import winsound
import tracker3d
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D as ax

pd.options.display.float_format = '{:,.2f}'.format

In [None]:
def row_max(modelPredictions):
    discreteOut = np.zeros(modelPredictions.size)
    maxInd = np.argmax(modelPredictions)
    discreteOut[maxInd] = 1
    return discreteOut

In [None]:
filename  = ("datasets/standard_100MeV.csv")
train, target = tracker3d.loader.dataLoad(pd.read_csv(filename), nev=5, tpe=5, ts=4, npe=0)

Let's take a look at the input and output training data.

from IPython.display import display,HTML

def multi_column_df_display(list_dfs, cols=2):
    """ Code by David Medenjak responding to StackOverflow question found here:
        https://stackoverflow.com/questions/38783027/jupyter-notebook-display-two-pandas-tables-side-by-side
        Displays a list of dataframes in IPython as a table with cols number of columns.
    """
    html_table = "<table style='width:100%; border:0px'>{content}</table>"
    html_row = "<tr style='border:0px'>{content}</tr>"
    html_cell = "<td style='width:{width}%;vertical-align:top;border:0px'>{{content}}</td>"
    html_cell = html_cell.format(width=100/cols)

    cells = [ html_cell.format(content=df.to_html()) for df in list_dfs ]
    cells += (cols - (len(list_dfs)%cols)) * [html_cell.format(content="")] # pad
    rows = [ html_row.format(content="".join(cells[i:i+cols])) for i in range(0,len(cells),cols)]
    display(HTML(html_table.format(content="".join(rows))))
# END FUNCTION multi_column_df_display

input_cols  = ["phi", "r", "act_z"]
output_cols = ["T{}".format(i) for i in range(tracker.output.shape[2] - 1)] + ["N"]
show_max    = 2

if show_max is not None and show_max > 0 and show_max < len(tracker.input):
    print("Displaying the first {} inputs and outputs.".format(show_max))
    input_frames  = [pd.DataFrame(data=tracker.input[i], columns=input_cols) for i in range(show_max)]
    output_frames = [pd.DataFrame(data=tracker.output[i].astype(int), columns=output_cols) for i in range(show_max)]
else:
    print("Displaying all of input and output.")
    input_frames  = [pd.DataFrame(data=matrix, columns=input_cols)  for matrix in tracker.input]
    output_frames = [pd.DataFrame(data=matrix.astype(int), columns=output_cols) for matrix in tracker.output]
    
df_list  = []
for i in range(len(input_frames)):    
    df_list.append(input_frames[i])
    df_list.append(output_frames[i])

print("Input shape:  {}".format(tracker.input.shape))
print("Output shape: {}".format(tracker.output.shape))
multi_column_df_display(df_list)

Let's now try to load a model into our tracker.

In [None]:
from keras.layers import Dense, LSTM, Dropout
from keras.models import Sequential

In [None]:
input_shape = train.shape # Shape of an event.
output_shape = len(target[0][0]) # Number of tracks per event

batch_size = 32
epochs     = 256
valsplit   = 0.25
opt        = 'rmsprop' # optimizer
tracker.model = Sequential()
tracker.model.add(GRU(32, return_sequences=True, input_shape=input_shape, dropout=.2, recurrent_dropout=.2))
tracker.model.add(Dense(output_shape, activation='softmax'))

tracker.model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
#tracker.model.summary()

In [None]:
modelpath = 'simple.h5'
hist = tracker.model.fit(train, target, epochs=epochs, batch_size=batch_size,
                         verbose=0, validation_split=valsplit,
                         callbacks=[keras.callbacks.ModelCheckpoint(filepath=modelpath, verbose=0)])
print("Ding! All done.")
winsound.Beep(1000, 1000)
winsound.Beep(1800, 1000)
winsound.Beep(2200, 1000)

It's time to graph the history of the neural network.

In [None]:
score, acc = tracker.model.evaluate(tracker.input, tracker.output, batch_size=batch_size)
print("\nTest Score:    {}".format(score))
print("Test Accuracy: {}".format(acc))
show_losses([("Categorical Cross Entropy", hist)])

In [None]:
predictions = tracker.model.predict(tracker.input[:len(input_frames)], batch_size=batch_size)

for i, outMax in enumerate(predictions):
    discreteOut = np.apply_along_axis(row_max, axis=1, arr=outMax)
    acc = 0
    showEvent = False
    for j, x in enumerate(discreteOut):
        if (any(np.equal(x, tracker.output[i][j].astype(int))==False)):
            print("The event where the wrong hit took place is:", i)
            print("The wrong hit is in row:", j)
            showEvent = True
    if showEvent==True:
        df = [pd.DataFrame(data=discreteOut), pd.DataFrame(data=tracker.output[i]), pd.DataFrame(input_frames[i])]
        multi_column_df_display(df)
        acc = acc + np.count_nonzero(np.equal(x, tracker.output[i][j].astype(int)))
    percentAcc = acc/(tracker.output[i].size)
    print("Accuracy: ", percentAcc)
    
df = []
for i in range(len(input_frames)):
    df.append(input_frames[i])
    df.append(pd.DataFrame(data=predictions[i], columns=output_cols))
multi_column_df_display(df)