# Train on 5,000 events. Test on the RAMP set.

In [1]:
!nvidia-smi

Fri Aug 25 16:50:27 2017       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 375.66                 Driver Version: 375.66                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 1080    Off  | 0000:04:00.0     Off |                  N/A |
| 27%   27C    P8     9W / 180W |    633MiB /  8114MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GTX 1080    Off  | 0000:05:00.0     Off |                  N/A |
| 27%   29C    P8     9W / 180W |      2MiB /  8114MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce GTX 1080    Off  | 0000:06:00.0     Off |                  N/A |
| 27%   

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
%matplotlib notebook
import importlib
import numpy as np
import pandas as pd
import pickle as pk
import keras
from keras.preprocessing.sequence import pad_sequences
from keras.layers import TimeDistributed, Dense, Dropout, GRU, Bidirectional
from keras.models import Sequential
from tracker import extractor as ext, utils, metrics, visuals

Using Theano backend.
Using cuDNN version 5110 on context None
Mapped name None to device cuda: GeForce GTX 1080 (0000:0D:00.0)


### Load in the data.

In [3]:
%%time
modelpath   = "data/models/UNIF-10N-25T-200E-235R.h5"
trainpath   = "data/sets/UNIF-10N-25T-200E-235R.gz"
testpath    = "data/sets/RAMP-10N-25T-235R.gz"
train_frame = pd.read_csv(trainpath)
test_frame  = pd.read_csv(testpath)
train = utils.list_of_groups(train_frame, group="event_id")
test  = utils.list_of_groups(test_frame,  group="event_id")
train = [event for event in train if len(pd.unique(event["cluster_id"])) ]
print("Train is list of {} events.".format(len(train)))
print("Test is list of {} events.".format(len(test)))
if (not utils.is_prepared(train_frame)) or (not utils.is_prepared(test_frame)):
    print("Warning: frame is not prepared.")
    print("Look at the prepare_frame() function in tracker/extractor.py")   

Train is list of 4800 events.
Test is list of 3600 events.
CPU times: user 3.61 s, sys: 68 ms, total: 3.68 s
Wall time: 3.68 s


In [4]:
order = ["phi", "r", "z"]

### Show an example of the data.

In [5]:
n = np.random.randint(len(test))
print("Train {}".format(n))
print("Number of Hits: {}".format(metrics.number_of_hits(test[n])))
print("Number of Tracks: {}".format(metrics.number_of_tracks(test[n])))
visuals.Plot2D(test[n], order).plot(mode="xy", title="Test {}".format(n))

Train 1375
Number of Hits: 105
Number of Tracks: 11


<IPython.core.display.Javascript object>

In [6]:
visuals.display(train[n], order)

Unnamed: 0,phi,r,z,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,noise,pad
0,-3.079459,405.0,-72.933755,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,
1,-2.922858,85.0,-12.718549,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,
2,-2.908075,155.0,92.888403,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,
3,-2.868357,562.0,-101.20684,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,
4,-2.635338,39.0,0.99954,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
5,-2.625954,85.0,2.178484,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
6,-2.605468,155.0,3.97253,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
7,-2.58595,213.0,5.459025,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
8,-2.567406,271.0,6.94552,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
9,-2.537619,762.0,-137.223509,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,


### Create the model.

In [7]:
input_shape  = (235, 3)
n_categories = 25 + 2
optimizer    = keras.optimizers.RMSprop(lr=0.001)
histories    = []

model = Sequential()
model.add(Bidirectional(
    GRU(units=256, return_sequences=True, recurrent_dropout=1/2, implementation=2),
    merge_mode="mul",
    input_shape=input_shape))
model.add(Dropout(rate=1/2))
model.add(Bidirectional(
    GRU(units=256, return_sequences=True, recurrent_dropout=1/2, implementation=2),
    merge_mode="mul"))
model.add(Dropout(rate=1/2))
model.add(Bidirectional(
    GRU(units=256, return_sequences=True, recurrent_dropout=1/2, implementation=2),
    merge_mode="mul"))
model.add(Dropout(rate=1/2))
model.add(TimeDistributed(Dense(units=n_categories, kernel_initializer="uniform", activation="softmax")))
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 235, 256)          399360    
_________________________________________________________________
dropout_1 (Dropout)          (None, 235, 256)          0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 235, 256)          787968    
_________________________________________________________________
dropout_2 (Dropout)          (None, 235, 256)          0         
_________________________________________________________________
bidirectional_3 (Bidirection (None, 235, 256)          787968    
_________________________________________________________________
dropout_3 (Dropout)          (None, 235, 256)          0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 235, 27)           6939      
Total para

### Fit the model.

In [13]:
%%time
epochs     = 64
batch_size = 100
histories.append(model.fit_generator(
    ext.input_output_generator(train, batch_size, order),
    steps_per_epoch=len(train) // batch_size,
    epochs=epochs,
    verbose=1,
    validation_data=ext.input_output_generator(test, batch_size, order),
    validation_steps=len(test) // batch_size,
    callbacks=[
        keras.callbacks.ModelCheckpoint(
            filepath=modelpath,
            save_best_only=True,
            verbose=0,)]))

Epoch 1/64
Epoch 2/64
Epoch 3/64
Epoch 4/64
Epoch 5/64
Epoch 6/64
Epoch 7/64
Epoch 8/64
Epoch 9/64
Epoch 10/64
Epoch 11/64
Epoch 12/64
Epoch 13/64
Epoch 14/64
Epoch 15/64
Epoch 16/64
Epoch 17/64
Epoch 18/64
Epoch 19/64
Epoch 20/64
Epoch 21/64
Epoch 22/64
Epoch 23/64
Epoch 24/64
Epoch 25/64
Epoch 26/64
Epoch 27/64
Epoch 28/64
Epoch 29/64
Epoch 30/64
Epoch 31/64
Epoch 32/64
Epoch 33/64
Epoch 34/64
Epoch 35/64
Epoch 36/64
Epoch 37/64
Epoch 38/64
Epoch 39/64
Epoch 40/64
Epoch 41/64
Epoch 42/64
Epoch 43/64
Epoch 44/64
Epoch 45/64
Epoch 46/64
Epoch 47/64
Epoch 48/64
Epoch 49/64
Epoch 50/64
Epoch 51/64
Epoch 52/64
Epoch 53/64
Epoch 54/64
Epoch 55/64
Epoch 56/64
Epoch 57/64
Epoch 58/64
Epoch 59/64
Epoch 60/64
Epoch 61/64
Epoch 62/64
Epoch 63/64
Epoch 64/64
CPU times: user 1h 32min 6s, sys: 2min 20s, total: 1h 34min 26s
Wall time: 1h 31min 18s


### Display information about the model after fitting it.

In [14]:
%%time
guesses          = model.predict(ext.extract_input(test, order))
hits_correct     = metrics.percent_of_hits_assigned_correctly(test, guesses=guesses, order=order)
tracks_correct   = metrics.percent_of_tracks_assigned_correctly(test, guesses=guesses, order=order, percent=1.0)
n_tracks_correct = metrics.percent_of_events_with_correct_number_of_tracks(test, guesses=guesses, order=order)
print("Percent of hits assigned correctly: {}%".format(hits_correct * 100))
print("Percent of tracks assigned correctly: {}%".format(tracks_correct * 100))
print("Percent of events with the correct number of tracks: {}%".format(n_tracks_correct * 100))

Percent of hits assigned correctly: 80.74596864786817%
Percent of tracks assigned correctly: 0.0%
Percent of events with the correct number of tracks: 78.86111111111111%
CPU times: user 1min 32s, sys: 60 ms, total: 1min 32s
Wall time: 1min 32s


### Display how one of the model's predictions looks.

In [15]:
n = 10
d = visuals.Plot2D(test[n], order, guesses[n]).plot(mode="xy")
d = visuals.Plot2D(test[n], order).plot(mode="xy")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
visuals.display(test[n], order, guesses[n], mode="discrete pairs")

Unnamed: 0,phi,r,z,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,noise,pad,A.1,B.1,C.1,D.1,E.1,F.1,G.1,H.1,I.1,J.1,K.1,L.1,M.1,N.1,O.1,P.1,Q.1,R.1,S.1,T.1,U.1,V.1,W.1,X.1,Y.1,noise.1,pad.1
0,-3.073799,85.0,9.620115,,`1`[0.0],,,,,,,,,,,,,,,,,,,,,,,,`0`[1.0],,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,
1,-3.049729,213.0,97.675637,,,,,,,,,,,,,,,,,,,,,,,,,,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,
2,-3.042236,39.0,-2.634788,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
3,-3.041446,85.0,-5.742487,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
4,-3.030948,155.0,-10.471594,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
5,-3.019117,213.0,-14.389998,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
6,-3.01275,39.0,4.413935,,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,
7,-3.006722,271.0,-18.308401,`1`[0.0],`0`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
8,-3.000443,1000.0,140.924702,,,`1`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,
9,-2.977025,405.0,-27.361263,`1`[0.0],`0`[1.0],,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,
