A place to test models

In [None]:
# Author: Daniel Zurawski
# Author: Keshav Kapoor
# Organization: Fermilab
# Grammar: Python 3.6.1

%matplotlib notebook
import keras # Neural network models
import pandas as pd # Data frames
import numpy as np  # numerical python
from tracker3d import loader, utils, metrics

In [None]:
%%time

# How hit columns should be ordered.
ORDERING = ("r", "z", "phi")

# Name of file to save/load train and target data to/from.
filename = "nevall_tpeall_tsall_rzp.npz"

# True if you want to load. False if you want to create your own data.
load_from_file = True

# Retrieve the data and store it into *train* and *target*.
if load_from_file:  # Much faster than creating your own!
    train, target = loader.from_file(filename)
else:
    frame = pd.read_csv("datasets/standard_curves100MeV.csv")
    train, target = loader.from_frame(
        frame,
        nev=99999,
        tpe=9999,
        ts=9999,
        variable_data=True,
        verbose=True,
        order=ORDERING)
    loader.to_file(train, target, filename)

In [None]:
# Get a taste for how an event looks by plotting a random one.
event_number = np.random.randint(0, len(train))

print("Event {}".format(event_number))

utils.plot3d(
    train[event_number],
    target[event_number],
    target[event_number],
    order=ORDERING
)

In [None]:
# Display the event's hits and category probability matrix.
utils.display_side_by_side(
    train[event_number],
    target[event_number],
    order=ORDERING
)

In [None]:
# To be used when we define our model.
from keras.layers import TimeDistributed, Dense, LSTM
from keras.layers import Dropout, GRU, Bidirectional
from keras.models import Sequential

In [None]:
%%time

# It is time to define a model.
trn_prcnt    = 0.25  # The percentage of events that are used for training.
in_idx       = (len(train) - int(trn_prcnt * len(train)), len(train))   # Used for training. 
test_idx     = (0, len(train) - int(trn_prcnt * len(train)))  # For prediction.
input_shape  = train[0].shape # Shape of an event.
output_shape = target.shape[2] # Number of tracks per event
epochs       = 128
batch_size   = 90
opt          = 'rmsprop'

# Create the model.
model = Sequential()
model.add(
    Bidirectional(
        GRU(
            128,
            return_sequences=True,
            implementation=2,
            stateful=True),
        batch_input_shape=(batch_size, input_shape[0], input_shape[1])
    )
)
model.add(Dropout(0.3))
model.add(TimeDistributed(Dense(output_shape, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])

# Print a summary of the model.
print("Training on events {0} to {1}.".format(in_idx[0], in_idx[1]))
print("Epochs: {0}, Batch Size: {1}, Validation Split: {2}%".format(
    epochs,
    batch_size,
    valsplit * 100))
print("Total events: {0} ({1}%).".format(
    int((in_idx[1] - in_idx[0]) * (1 - valsplit)), 
    trn_prcnt * 100 * (1 - valsplit))) 
model.summary()

In [None]:
%%time

# It is time to train the model.
np.random.seed(7) # For reproducibility
test_train  = train[test_idx[0]:test_idx[1]]
test_target = target[test_idx[0]:test_idx[1]]

modelpath = "my_model.h5"
hist = model.fit (
    train [in_idx[0]:in_idx[1]],
    target[in_idx[0]:in_idx[1]],
    epochs=epochs,
    batch_size=batch_size,
    verbose=True,
    validation_data=(test_train, test_target)
)

In [None]:
%%time

# Give the model the data that it did not train on and ask the model to predict it.
predictions = model.predict(train, batch_size=batch_size)
test_pred   = predictions[test_idx[0]:test_idx[1]]

print("Discrete Accuracy: {}".format(
    metrics.discrete_accuracy_all(
        test_train,
        test_pred,
        test_target,
        padding=True
    )
))

In [None]:
# Plot a prediction with event number being *event_number*.
# Event with most tracks: 3435 (21 tracks)
# Events with >19 tracks: 130, 282, 1080, 3023, 3178, 3435, 3445, 3527
# Events with 2 tracks: 11, 761, 1238
# Event with 1 track: 292, 2415, 3313, 3390
# No events have 0 tracks.
event_number = 3178

print("Event {}".format(event_number))

print(metrics.discrete_accuracy(
    train[event_number],
    predictions[event_number],
    target[event_number],
    padding=True
))

utils.plot3d(
    train[event_number],
    target[event_number],
    target[event_number],
    order=ORDERING
)

In [None]:
# Display the prediction compared to the target matrix.
utils.display_side_by_side(
    train[event_number],
    target[event_number],
    predictions[event_number],
    order=ORDERING
)

In [None]:
# Plot a history of losses and accuracy that the model calculated
# during the fitting process.
utils.print_scores(model, train, target, batch_size)
utils.graph_losses([("Categorical Cross Entropy", hist)])