A place to test models

In [None]:
# Author: Daniel Zurawski
# Author: Keshav Kapoor
# Organization: Fermilab
# Grammar: Python 3.6.1

### Choose either (1) or (2).
### (1) If you prefer a separate window for plots, uncomment the below.
import matplotlib
matplotlib.use('qt5agg')

### (2) If you prefer plots to display within the notebook, uncomment the below.
### WARNING: Plots suffer performance issues and will lag a bit.
# %matplotlib notebook

import keras # Neural network models
import pandas as pd # Data frames
import numpy as np  # numerical python
from tracker3d import loader, utils, metrics

In [None]:
%%time

# How hit columns should be ordered.
order   = ("phi", "r", "z")
n_noise = 10
code    = order[0][0] + order[1][0] + order[2][0]
load_from_file = False  # True if you want to load from .npz file. False if you want to create your own data.

# Name of files to save/load train and target data to/from.
test_file  = "datasets/npz/ramp_{0}_n{1}.npz".format(code, n_noise)  # Data to test models with.
train_file = "datasets/npz/unif_{0}_n{1}.npz".format(code, n_noise)  # Data to train models with.

# Name of .csv files to load train and target data from if you don't want to load from .npz file.
test_csv  = "datasets/raw/ramp_z.csv"
train_csv = "datasets/raw/unif_25t_4000e.csv"

# Retrieve the data.
if load_from_file:  # Much faster than creating your own!
    test_data,  test_target  = loader.from_file(test_file)
    train_data, train_target = loader.from_file(train_file)
else:
    # If load_from_file is False, load the data from .csv files and then save
    # the data to .npz files.
    train_data, train_target = loader.from_frame(
            frame=pd.read_csv(train_csv),
            nev=99999,
            tpe=9999,
            ts=9999,
            variable_data=True,
            verbose=True,
            order=order,
            n_noise=n_noise
    )
    test_data, test_target = loader.from_frame(
            frame=pd.read_csv(test_csv),
            nev=99999,
            tpe=9999,
            ts=9999,
            variable_data=True,
            verbose=True,
            order=order,
            n_noise=n_noise,
            preferred_rows=train_target.shape[1],
            preferred_tracks=train_target.shape[2]
    )
    loader.to_file(train_data, train_target, train_file)
    loader.to_file(test_data, test_target, test_file)
    
print("Successfully loaded!")
print("train_data shape:   {0},\ntrain_target shape: {1}".format(train_data.shape, train_target.shape))
print("test_data shape:    {0},\ntest_target shape   {1}".format(test_data.shape, test_target.shape))

In [None]:
# Get a taste for how an event looks by plotting a random one.
train  = train_data
target = train_target
event_number = np.random.randint(0, len(train))

print("Event {}".format(event_number))

# Note: This will open up in a different window if using 'qt5agg'.
# plot = utils.plot3d(
#     train[event_number],
#     target[event_number],
#     order=ORDERING,
#     title="Test Ramp {}".format(event_number),
#     flat_ax=None
# )
plot = utils.plot3d(
    test_data[event_number],
    test_target[event_number],
    order=order,
    title="Train Uniform {}".format(event_number),
    flat_ax="z"
)

In [None]:
# Display the event's hits and category probability matrix.
# Blank entries in the category probability matrix are 0's.
train  = train_data
target = train_target
event_number = np.random.randint(0, len(train))
utils.display_side_by_side(
    train[event_number],
    target[event_number],
    target[event_number],
    order=order,
    display="bold"
)

In [None]:
# To be used when we define our model.
from keras.layers import TimeDistributed, Dense, LSTM, Activation
from keras.layers import Dropout, GRU, Bidirectional, Conv2D, Conv1D
from keras.layers import MaxPooling2D, Flatten
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential

In [None]:
%%time

# It is time to define parameters for the model.
input_shape  = train_data.shape[1:] # Shape of an event.
num_classes  = train_target.shape[2] # Number of tracks per event
epochs       = 8
batch_size   = 20
valsplit     = 0
opt          = 'rmsprop'

# Construct the model.
model = Sequential()
model.add(Dropout(0.3, input_shape=input_shape))
model.add(TimeDistributed(Dense(num_classes, kernel_initializer='uniform', activation='softmax')))

# Compile the model.
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])

# Print a summary of the model.
print("Epochs: {0}, Batch Size: {1}, Validation Split {2}%".format(
    epochs,
    batch_size,
    valsplit * 100
))
model.summary()

In [None]:
%%time

# It is time to train the model.
np.random.seed(7) # For reproducibility
modelpath = "my_model.h5"
hist = model.fit (
    train_data,
    train_target,
    epochs=epochs,
    batch_size=batch_size,
    verbose=1,
    #validation_split=valsplit,             
    validation_data=(test_data, test_target),
    shuffle=False
)

In [None]:
%%time

# Give the model the data that it did not train on and ask the model to predict it.
predictions = model.predict(test_data, batch_size=batch_size)
print("Predicted {} events.".format(predictions.shape[0]))
# test_pred   = predictions[test_idx[0]:test_idx[1]]

print("Discrete Accuracy: {}".format(
    metrics.discrete_accuracy_all(
        test_data,
        predictions,
        test_target,
        padding=True  # If we want to not count padding rows, then True.
    )
))

In [None]:
# Plot a prediction with event number being *event_number*.
# Within the ramp data set:
# Event with most tracks: 3435 (21 tracks)
# Events with >19 tracks: 130, 282, 1080, 3023, 3178, 3435, 3445, 3527
# Events with 2 tracks: 11, 761, 1238
# Event with 1 track: 292, 2415, 3313, 3390
# No events have 0 tracks.
train  = test_data
target = test_target
event_number = 130
print("Event {}".format(event_number))

print(metrics.discrete_accuracy(
    train[event_number],
    target[event_number]
    predictions[event_number],
    padding=True
))

plot = utils.plot3d(
    train[event_number],
    predictions[event_number],
    target[event_number],
    order=ORDERING,
    title="Event {}".format(event_number),
    flat_ax=None
)

In [None]:
# Display the prediction compared to the target matrix.
utils.display_side_by_side(
    train[event_number],
    predictions[event_number],
    target[event_number],
    order=order,
    display="bold"
)

In [None]:
# Plot a history of losses and accuracy that the model calculated
# during the fitting process.
utils.print_scores(model, train, target, batch_size)
utils.graph_losses([("Categorical Cross Entropy", hist)])