# 03 - Sequence Model Approach

* The more 'classical' approach to solving this problem
* Train a model that can take any number of 'steps'
* Makes a prediction on next step based on previous steps
* Learn from full tracks
* For test tracks, predict what the next step's values will be

In [6]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, LeakyReLU, Dropout, ReLU, GRU, TimeDistributed, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from jlab import load_test_data, get_test_detector_plane

## Load up and prep the datasets

In [7]:
X_train = pd.read_csv('MLchallenge2_training.csv')
X_test = load_test_data('test_in.csv')
eval_planes = get_test_detector_plane(X_test)

# Also, load our truth values
y_true = pd.read_csv('test_prediction.csv', names=['x', 'y', 'px', 'py', 'pz'],
                     header=None)

In [8]:
X_test.head()

Unnamed: 0,x,y,z,px,py,pz,x1,y1,z1,px1,...,z23,px23,py23,pz23,x24,y24,z24,px24,py24,pz24
0,0.877,1.322,65.0,-0.244,-0.053,2.414,-10.669,0.33,176.944,-0.254,...,,,,,,,,,,
1,0.786,-2.483,65.0,0.103,0.432,2.593,7.366,15.502,176.944,0.206,...,,,,,,,,,,
2,-13.134,-26.531,65.0,0.064,-0.021,0.953,-7.586,-30.687,176.944,0.027,...,,,,,,,,,,
3,18.454,2.805,65.0,-0.019,0.069,1.833,18.043,6.797,176.944,0.013,...,,,,,,,,,,
4,15.552,-19.196,65.0,-0.01,-0.011,2.366,15.068,-19.75,176.944,-0.014,...,341.28,-0.014,-0.002,2.351,,,343.405,,,


In [9]:
y_true.head()

Unnamed: 0,x,y,px,py,pz
0,-23.123945,3.142886,-0.235592,0.091612,2.413377
1,19.633486,32.319292,0.314376,0.316425,2.592952
2,-8.308506,-39.299613,-0.020097,-0.051232,0.948906
3,19.918838,10.664617,0.038102,0.04774,1.864014
4,13.649239,-20.616935,-0.015548,0.001471,2.323953


## Construct the training data and targets

* For each track
  * Choose a number N between 8 and 24
  * That track will have 6 kinematics for N blocks
  * The target variable will be the 6 kinematic variables for the N+1th detector block
* This will cause variable length sequences
* Apply `pad_sequences` to prepend with zeros appropriately

### Training Dataset

In [10]:
N_SAMPLES = len(X_train)
N_DETECTORS = 25
N_KINEMATICS = 6
SHAPE = (N_SAMPLES, N_DETECTORS-1, N_KINEMATICS)

In [11]:
X_train_list = []
y_train_array = np.ndarray(shape=(N_SAMPLES, N_KINEMATICS-1))
for ix in range(N_SAMPLES):
    seq_len = np.random.choice(range(8, 25))
    track = X_train.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
    X_train_list.append(track[0:seq_len])
    # Store the kinematics of the next in the sequence
    # Ignore the 3rd one, which is z
    y_train_array[ix] = track[seq_len][[0,1,3,4,5]]

In [12]:
for track in X_train_list[:10]:
    print(len(track))

12
17
16
22
9
11
13
22
24
17


In [13]:
X_train_list = pad_sequences(X_train_list, dtype=float)

In [14]:
for track in X_train_list[:10]:
    print(len(track))

24
24
24
24
24
24
24
24
24
24


In [15]:
X_train_array = np.array(X_train_list)
X_train_array.shape

(194601, 24, 6)

In [16]:
y_train_array.shape

(194601, 5)

### Validation Dataset

In [17]:
N_TEST_SAMPLES = len(X_test)

In [18]:
y_test_array = y_true.values

In [19]:
X_test_list = []
for ix in range(N_TEST_SAMPLES):
    seq_len = get_test_detector_plane(X_test.iloc[ix])
    track = X_test.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
    X_test_list.append(track[0:seq_len])

In [20]:
X_test_list = pad_sequences(X_test_list, dtype=float)
X_test_array = np.array(X_test_list)

In [21]:
X_test_array.shape

(10000, 24, 6)

In [22]:
y_test_array.shape

(10000, 5)

In [23]:
y_true.values.shape

(10000, 5)

## Define sequence model

In [161]:
def lstm_model():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
    
    return model

In [150]:
model = lstm_model()
model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_9 (LSTM)                (None, 200)               165600    
_________________________________________________________________
dense_17 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 5)                 505       
Total params: 186,205
Trainable params: 186,205
Non-trainable params: 0
_________________________________________________________________


In [143]:
history = model.fit(x=X_train_array, y=y_train_array, validation_data=(X_test_array, y_test_array), epochs=5)

Train on 194601 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1a3b85b1d0>

In [151]:
history = model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    epochs=50, use_multiprocessing=True)

Train on 194601 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [156]:
model = lstm_model()
es = EarlyStopping(monitor='val_loss', mode='min')
history = model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 194601 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [157]:
model.save("lstm100-dense100-dropout025-epochs20-early-stopping.h5")

In [162]:
def lstm_model_lin():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
    
    return model

lin_act_model = lstm_model_lin()
es = EarlyStopping(monitor='val_loss', mode='min')
history = lin_act_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 10000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [164]:
def lstm_model_adam():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

adam_model = lstm_model_adam()
es = EarlyStopping(monitor='val_loss', mode='min')
history = adam_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 10000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


In [166]:
def lstm_model_dropout50():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.50))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

dropout50_model = lstm_model_dropout50()
es = EarlyStopping(monitor='val_loss', mode='min')
history = dropout50_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 10000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


In [172]:
def lstm_model_nodropout():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

nodropout_model = lstm_model_nodropout()
es = EarlyStopping(monitor='val_loss', mode='min')
history = nodropout_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 10000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [167]:
def lstm_model_relu():
    
    model = Sequential()
    model.add(LSTM(200, activation='relu', input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

relu_model = lstm_model_relu()
es = EarlyStopping(monitor='val_loss', mode='min')
history = relu_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 10000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


In [170]:
def model_gru():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

gru_model = model_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = gru_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 10000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20


### Early Conclusions

* GRU > LSTM
* LeakyReLU > ReLU
* adam > rmsprop
* dropout 0.25 > dropout 0.5 > no dropout

In [243]:
def model_v2():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model = model_v2()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=8, use_multiprocessing=True)

Train on 194601 samples, validate on 10000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8


In [None]:
from tensorflow.keras.back

In [26]:
def model_v2_deep():
    
    model = Sequential()
    model.add(GRU(30, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(30, activation=LeakyReLU(), return_sequences=True))
    model.add(GRU(30, activation=LeakyReLU()))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_deep = model_v2_deep()
v2_model_deep.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_7 (GRU)                  (None, 24, 30)            3420      
_________________________________________________________________
gru_8 (GRU)                  (None, 24, 30)            5580      
_________________________________________________________________
gru_9 (GRU)                  (None, 30)                5580      
_________________________________________________________________
dense_3 (Dense)              (None, 100)               3100      
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 505       
Total params: 18,185
Trainable params: 18,185
Non-trainable params: 0
__________________________________________________

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', patience=2, restore_best_weights=True)
history = v2_model_deep.fit(x=X_train_array, y=y_train_array,
                            validation_data=(X_test_array, y_test_array),
                            callbacks=[es],
                            epochs=8, use_multiprocessing=True)

Train on 194601 samples, validate on 10000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8

In [242]:
def model_v2_dbl_gru():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_dbl_gru = model_v2_dbl_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_dbl_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=10, use_multiprocessing=True)

Train on 20000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [177]:
def model_v2_2x_dropout():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dropout(0.25))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_dbl_dropout = model_v2_2x_dropout()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_dbl_dropout.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)

Train on 20000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


In [200]:
def model_v2_big_gru():
    
    model = Sequential()
    model.add(GRU(400, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_big_gru = model_v2_big_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=10, use_multiprocessing=True)

ValueError: Input 0 of layer gru_14 is incompatible with the layer: expected ndim=3, found ndim=5. Full shape received: [None, 1, None, 24, 6]

In [181]:
v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=15, use_multiprocessing=True, initial_epoch=10)

Train on 20000 samples, validate on 10000 samples
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x1a8e946748>

## Try CNN LSTM

In [221]:
X_train_array.shape

(194601, 24, 6)

In [240]:
def cnn_gru():
    
    model = Sequential()
    model.add(Conv1D(filters=5, kernel_size=2, strides=1, input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    #model.add(MaxPooling1D())
    model.add(GRU(200, activation=LeakyReLU()))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

cnn_model = cnn_gru()
cnn_model.summary()

Model: "sequential_73"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_20 (Conv1D)           (None, 23, 5)             65        
_________________________________________________________________
gru_25 (GRU)                 (None, 200)               124200    
_________________________________________________________________
dense_69 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout_25 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_70 (Dense)             (None, 5)                 505       
Total params: 144,870
Trainable params: 144,870
Non-trainable params: 0
_________________________________________________________________


In [241]:
#es = EarlyStopping(monitor='val_loss', mode='min')
history = cnn_model.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                      validation_data=(X_test_array, y_test_array),
                      epochs=10, use_multiprocessing=True)

Train on 20000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [251]:
history.history

{'loss': [3.3281039200901317,
  1.6039592637484563,
  1.3489888134789536,
  1.2624885631565317,
  1.2353142021468715,
  1.211998767219029,
  1.1837373140878185,
  1.1759768705626037],
 'val_loss': [0.778679012966156,
  0.5407980192184448,
  0.5594191231250762,
  0.4179811120986939,
  0.27897539434432983,
  0.18599163811206817,
  0.1257927789211273,
  0.10037544323205948]}

## Enough tinkering around

* Formalize this into some scripts
* Make predictions on competition test data

In [1]:
from train import train
from predict import predict

In [2]:
model = train(frac=1.00, filename="dannowitz_jlab2_model", epochs=100, ret_model=True)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, 24, 30)            3420      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 24, 30)            0         
_________________________________________________________________
gru_1 (GRU)                  (None, 24, 30)            5580      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 24, 30)            0         
_________________________________________________________________
gru_2 (GRU)                  (None, 30)                5580      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 30)                0         
_________________________________________________________________
dense (Dense)                (None, 100)               3

KeyboardInterrupt: 

In [None]:
preds = predict(model_filename="dannowitz_jlab2_model.h5",
                data_filename="test_in (1).csv",
                output_filename="danowitz_jlab2_submission.csv")