### 1.) Import the necessary modules.

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
%matplotlib notebook
import numpy as np
import pandas as pd
import keras
from keras.layers import Dense
from keras.models import Sequential
from tracker import extractor, utils, metrics, visuals

### 2.) Load in and prepare a frame of data.

In [None]:
your_frame_filepath = "this_is_where_my_data_is.csv"
your_frame = pd.read_csv(your_frame_filepath)

print("Preparing the frame. This may take a little while.")
prepared_frame = extractor.prepare_frame(
    frame   = your_frame,
    n_track = 25,   # Maximum number of regular tracks per event.
    n_rows  = 260,  # Maximum number of regular hits plus number of noisy hits per event.
    n_noise = 10,   # Number of noisy hits to add to each event.
)

print("Saving the frame. This may take a little while.")
save_filepath = "i_will_save_my_new_frame_here.gz"
prepared_frame.to_csv(save_filepath, compression="gzip")

print("All done.")

### 3.) Load in prepared training and testing data and get a list of events

In [None]:
filepath_to_prepared_train_frame = "here_is_my_prepared_training_frame.gz"
filepath_to_prepared_test_frame  = "here_is_my_prepared_testing_frame.gz"

print("Loading in the frame. This may take a little while.")
train_frame = pd.read_csv(filepath_to_prepared_train_frame)
test_frame  = pd.read_csv(filepath_to_prepared_test_frame)
train = utils.list_of_groups(train_frame, group="event_id")
test  = utils.list_of_groups(test_Frame,  group="event_id")

print("Train is list of {0} events ({1} bytes).".format(len(train), sys.getsizeof(train)))
print("Test  is list of {0} events ({1} bytes).".format(len(test),  sys.getsizeof(test)))

if (not utils.is_prepared(train_frame)) or (not utils.is_prepared(test_frame)):
    print("Warning: frame is not prepared.")
    print("Look at the prepare_frame() function in tracker/extractor.py")
else:
    del train_frame # To save on memory space.
    del test_frame  # To save on memory space.

### 4.) Specify how input is sorted before being put into the model.

In [None]:
# Sort by order[0] first. Then by order[1]. Finally, by order[2].
order = ["phi", "r", "z"]

### 5.) Show an example of the data that was loaded in.

In [None]:
n = np.random.randint(len(train))
print("Train {}".format(n))
print("Number of Hits: {}".format(metrics.number_of_hits(train[n])))
print("Number of Tracks: {}".format(metrics.number_of_tracks(train[n])))
visuals.Plot2D(train[n], order).plot(mode="zr", title="Train {}".format(n))
visuals.Plot2D(train[n], order).plot(mode="xy", title="Train {}".format(n))

In [None]:
visuals.display(train[n], order)

### 6.) Create a model.

In [None]:
modelpath    = "where_my_model_will_be_stored.h5"
input_shape  = (260, 3)  # Shape is (Max number of rows, 3).
n_categories = 25 + 2  # Number of categories. Includes noise & padding categories.
optimizer    = keras.optimizers.RMSprop(lr=0.001)
histories    = []  # Where training history is stored.

In [None]:
# If you want to load a model, run this cell.
# Otherwise, do not run this cell.
model = keras.models.load_model(modelpath)

In [None]:
# This model is very simple. It is not complex enough to assign hits to tracks well.
model = Sequential()

model.add(Dense(units=256, input_shape=input_shape))
model.add(Dense(units=n_categories, kernel_initializer="uniform", activation="softmax"))

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

### 7.) Fit the model with the training data.

In [None]:
epochs     = 30
batch_size = 100
histories.append(model.fit_generator(
    extractor.input_output_generator(train, batch_size, order),
    steps_per_epoch=len(train) // batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=extractor.input_output_generator(test, batch_size, order),
    validation_steps=len(test) // batch_size,
    callbacks=[
        keras.callbacks.ModelCheckpoint(
            filepath=modelpath,
            save_best_only=True,
            verbose=0,),
        keras.callbacks.EarlyStopping(patience=10, verbose=1),
        keras.callbacks.ReduceLROnPlateau(patience=5, verbose=1)]))

### 8.) Use some metrics to get general information about the trained model.

In [None]:
test.sort(key=lambda x: x.iloc[0]["event_id"])
guesses = model.predict(ext.extract_input(test, order))

In [None]:
hits_correct     = metrics.percent_of_hits_assigned_correctly(test, guesses=guesses, order=order)
tracks_correct   = metrics.percent_of_tracks_assigned_correctly(test, guesses=guesses, order=order, percent=1.0)
n_tracks_correct = metrics.percent_of_events_with_correct_number_of_tracks(test, guesses=guesses, order=order)
print("Percent of hits assigned correctly: {}%".format(hits_correct * 100))
print("Percent of tracks assigned correctly: {}%".format(tracks_correct * 100))
print("Percent of events with the correct number of tracks: {}%".format(n_tracks_correct * 100))

### 9.) Graph a model's prediction.

In [None]:
n  = 3138  # The event number.
print("Percent of hits assigned correctly: {}%".format(
    metrics.percent_of_hits_assigned_correctly(test[n], guesses[n], order) * 100))
print("Percent of tracks assigned correctly: {}%".format(
    metrics.percent_of_tracks_assigned_correctly(test[n], guesses[n], order) * 100))
_ = visuals.Plot2D(test[n], order, guesses[n]).plot(mode="zr", title="Prediction {}".format(n))
_ = visuals.Plot2D(test[n], order).plot(mode="zr", title="Answer {}".format(n))
_ = visuals.Plot2D(test[n], order, guesses[n]).plot(mode="xy", title="Prediction {}".format(n))
_ = visuals.Plot2D(test[n], order).plot(mode="xy", title="Answer {}".format(n))

### 10.) Display Number of Tracks vs. Discrete Accuracy.

In [None]:
%%time
n_tracks, accuracy = metrics.accuracy_vs_tracks(test, guesses, order)
r = range(n_tracks.min(), n_tracks.max() + 1)
boxes = [[] for _ in r]
for i in range(len(n_tracks)):
    boxes[n_tracks[i] - n_tracks.min()].append(accuracy[i])
visuals.boxplot(boxes, "UNIF-10N-25T-25000E", "Number of Tracks", "Discrete Accuracy", xticks=list(r))