# LSTM boxcox lambda = 1.4

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Input, concatenate, Masking, LSTM, TimeDistributed, Bidirectional, RepeatVector
from tensorflow.keras import regularizers 
from tensorflow.keras import initializers
import h5py 
import os 
os.environ["CUDA_VISIBLE_DEVICES"]="1"


%matplotlib inline 

dataset = h5py.File('./../../../../../data/hongtao/variables_tt_re2.h5','r')

X_keys = ['j1_pt', 'j1_eta', 'j1_phi', 'j1_m', 'j1_DL1r', 'j2_pt', 'j2_eta', 'j2_phi', 'j2_m', 'j2_DL1r', 'j3_pt', 'j3_eta', 'j3_phi', 'j3_m', 'j3_DL1r', 'j4_pt', 'j4_eta', 'j4_phi', 'j4_m', 'j4_DL1r', 'j5_pt', 'j5_eta', 'j5_phi', 'j5_m', 'j5_DL1r', 'lep_pt', 'lep_eta', 'lep_phi', 'met_met', 'met_phi']
Y_keys = ['th_pt', 'th_eta','th_phi', 'tl_pt', 'tl_eta', 'tl_phi', 'wh_pt', 'wh_eta', 'wh_phi', 'wl_pt', 'wl_eta', 'wl_phi', 'bh_pt','bh_eta', 'bh_phi','bl_pt','bl_eta', 'bl_phi']
phi_keys = list(filter(lambda a: 'phi' in a, dataset.keys()))
eta_keys = list(filter(lambda a: 'eta' in a, dataset.keys()))
pt_keys =  list(filter(lambda a: 'pt' in a, dataset.keys()))
m_keys = list(filter(lambda a: 'm' in a, dataset.keys()))
DL1r_keys = list(filter(lambda a: 'DL1r' in a, dataset.keys()))


Y_length = len(Y_keys)
X_length = len(X_keys)
crop0 =  200000 

In [2]:
import scale_variables
import shape_timesteps

# How to scale each input/output
def get_methods(keys):
    types = {'pt':'cartbox', 'eta':'meanmax', 'phi':'phi_pi', 'DL1r':'meanmax', 'm':'meanmax', 'met':'meanmax'}
    var_types = [key.split('_')[1] for key in keys]
    methods = [types[var] for var in var_types]
    return methods

X_methods = get_methods(X_keys)
Y_methods = get_methods(Y_keys)

Scaler = scale_variables.Scale_variables()
(X_total, X_maxmean), X_names = Scaler.scale_arrays(X_keys, X_methods, True)
(Y_total, Y_maxmean), Y_names = Scaler.scale_arrays(Y_keys, Y_methods, True)

error = Scaler.test_inverse(Y_keys, Y_methods, True)
print('Max scaling error: {}'.format(error))

Max scaling error: 3.410605131648481e-13


In [3]:
# Create X and test array
split = int(np.floor(0.8*crop0)) # 80/20 split 

trainY, testY = Y_total[0:split,:], Y_total[split:,:]

timestep_builder = shape_timesteps.Shape_timesteps()
totalX_jets, totalX_other = timestep_builder.reshape_X(X_total, X_names, True,True)

mask = timestep_builder.create_mask()
totalX_jets = mask*totalX_jets - 2*(1-mask)
trainX_jets, testX_jets = totalX_jets[0:split,:,:], totalX_jets[split:,:,:]
trainX_other, testX_other = totalX_other[0:split,:], totalX_other[split:,:]

trainX_other = np.reshape(trainX_other, (trainX_other.shape[0], 1, -1))
testX_other = np.reshape(testX_other, (testX_other.shape[0], 1, -1))
trainX = np.concatenate([trainX_other, trainX_jets], axis=1)
testX = np.concatenate([testX_other, testX_jets], axis=1)
trainY = np.split(trainY, 6, axis=1)
trainY = np.stack(trainY, axis=1)
testY = np.split(testY, 6, axis=1)
testY = np.stack(testY, axis=1)
# Save some memory 
del totalX_jets
del totalX_other 

print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)

(160000, 6, 5) (160000, 6, 3)
(40000, 6, 5) (40000, 6, 3)


In [4]:
# Histograms of total  Y variables 
show = False

other_bins = np.linspace(-1, 1, 40)
phi_bins = np.linspace(-0.1, 0.1, 40)
pt_bins = np.linspace(-0.3, 1, 40)
Y_bins = [phi_bins if 'phi' in name else pt_bins if 'pt' in name else other_bins for name in Y_names]

if show:
    plt.figure(figsize=(6,6*Y_total.shape[1]))
    for i in range(0, Y_total.shape[1]):
        plt.subplot(Y_total.shape[1], 1, i+1)
        bins = Y_bins[i]
        plt.hist(Y_total[:,i], bins, histtype='step')
        plt.xlabel(Y_names[i])
        plt.ylabel('Frequency')

In [5]:
# Histograms of X variables (without the mask)
show = False

if show:
    plt.figure(figsize=(6,6*X_total.shape[1]))
    for i in range(0, X_total.shape[1]):
        plt.subplot(X_total.shape[1], 1, i+1)
        plt.hist(X_total[:,i], 40, histtype='step')
        plt.xlabel(X_names[i])
        plt.ylabel('Frequency')

 

In [6]:
# Build model

def build_model():
    In = Input(shape=(trainX.shape[1], trainX.shape[2]))
    Mask = Masking(-2)(In)
    TD1 = TimeDistributed(Dense(440, activation='relu'))(Mask)
    LSTM1 = Bidirectional(LSTM(88, return_sequences=True))(TD1)
    LSTM2 = Bidirectional(LSTM(88, return_sequences=True))(LSTM1)
    LSTM3 = Bidirectional(LSTM(88, return_sequences=True))(LSTM2)
    TD2 = TimeDistributed(Dense(320, activation='relu'))(LSTM3)
    TD3 = TimeDistributed(Dense(90, activation='relu'))(TD2)
    TD4 = TimeDistributed(Dense(30, activation='relu'))(TD3)
    TD5 = TimeDistributed(Dense(trainY.shape[2], activation='linear'))(TD4)
    model = keras.models.Model(inputs=[In], outputs=TD5)
    optimizer = keras.optimizers.Adam(learning_rate=5e-5)
    model.compile(loss='mse', optimizer= optimizer, metrics=['mse'])
    
    return model 

model = build_model()
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 6, 5)]            0         
_________________________________________________________________
masking (Masking)            (None, 6, 5)              0         
_________________________________________________________________
time_distributed (TimeDistri (None, 6, 440)            2640      
_________________________________________________________________
bidirectional (Bidirectional (None, 6, 176)            372416    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 6, 176)            186560    
_________________________________________________________________
bidirectional_2 (Bidirection (None, 6, 176)            186560    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 6, 320)            56640 

In [None]:
# Fit model

callback = keras.callbacks.EarlyStopping(monitor='mse', patience=10)

Epochs= 256

# del X_total
# del Y_total

history = model.fit([trainX], trainY, verbose=1, epochs=Epochs, 
                   validation_split=0.2, shuffle=False,
                   batch_size=512)

In [None]:
plt.figure(figsize=(12,8))
plt.plot(history.history['loss'], label='training')
plt.plot(history.history['val_loss'], label='validation')
plt.xlabel('epoch')
plt.ylabel('mse loss')
plt.legend()
plt.title('MSE loss')

In [None]:
class Analysis:
    def __init__(self):
        self.crop = 100
        self.fsize = 6
        self.histogram_bins = 40
        
    def wrap_phi(self, var):
        var = var%(2*np.pi)
        var = var - 2*np.pi*(var > np.pi)
        return var
        
    def predictions_vs_sample(self, compare, true, names, wrap_phi):
        plt.figure(figsize=(self.fsize*2, self.fsize*len(names)))
        for i in range(0, len(names)):
            compare_small = compare[:self.crop,i]
            true_small = true[:self.crop,i]
            if wrap_phi and "phi" in names[i]:
                compare_small = self.wrap_phi(compare_small)
                true_small = self.wrap_phi(true_small)
            plt.subplot(len(names), 1, i+1)
            plt.plot(range(0,self.crop), compare_small, 'bo', markersize=3, label = 'Predictions')
            plt.plot(range(0,self.crop), true_small, 'ro', markersize=3, label = 'True Value')
            ym, yM = plt.ylim()
            for x in range(self.crop):
                plt.vlines(x, color='g', linestyle='-', alpha=0.2, ymin= 
                            min(compare_small[x], true_small[x]), 
                            ymax= max(compare_small[x], true_small[x]))
            plt.hlines(np.mean(true[:,i]), xmin=-20, xmax=self.crop+20, alpha=0.5)
            MSE = 1/compare[:,i].size*np.sum((compare[:,i]- true[:,i])**2)
            plt.xlabel('Sample')
            plt.xlim(0, self.crop)
            plt.ylabel(names[i])
            plt.title(names[i] + " MSE: " + str(MSE))
            plt.legend()
    
    def display_errors(self, compare, true, names, wrap_phi):
        MSE = 1/compare.size*np.sum((compare- true)**2)
        print("total MSE: " + str(MSE))
        print(" ")
        for i in range(len(names)):
            diff = compare[:,i] -true[:,i]
            if wrap_phi and "phi" in names[i]:
                diff = self.wrap_phi(diff)
            MSE = 1/compare[:,i].size*np.sum((diff)**2)
            print("{0} MSE : ".format(names[i]), '%.10f'%MSE)
    
    def difference_histogram(self, compare, true, names, wrap_phi, bins):
        plt.figure(figsize=(self.fsize*2,self.fsize*len(names)))
        for i in range(len(names)):
            plt.subplot(len(names), 1, i+1)
            diff = true[:,i] - compare[:,i]
            hist0, bin_edges = np.histogram(true[:, i], bins=40)
            if bins[i] is None:
                hbins = bin_edges
            else:
                hbins = bins[i]
            plt.hist(diff, hbins, histtype='step', color='purple', label='true - predicted', density=True)
            plt.xlabel("Difference (Mean: {0}, Std: {1})".format(np.mean(diff), np.std(diff)))
            plt.title(names[i])
            plt.legend()
            plt.ylabel('Frequency')
            
    def variable_histogram(self, compare, true, names, wrap_phi, bins): 
        plt.figure(figsize=(self.fsize*2,self.fsize*len(names)))
        for i in range(len(names)):
            plt.subplot(len(names), 1, i+1)
            compare_small = compare[:, i]
            true_small = true[:, i]
            if wrap_phi and "phi" in names[i]:
                compare_small = self.wrap_phi(compare_small)
                true_small = self.wrap_phi(true_small)
            hist0, bin_edges = np.histogram(true[:, i], bins=40)
            
            if bins[i] is None:
                hbins = bin_edges
            else:
                hbins = bins[i]
                
            plt.hist(true_small, hbins, histtype='step', color='b', label='true values', density=False)
            plt.hist(compare_small, hbins, histtype='step', color='r', label='predictions', density=False)
            plt.xlabel(names[i])
            plt.title(names[i])
            plt.legend()
            plt.ylabel('Frequency')
    
    def difference_vs_variable(self, compare, true, names, wrap_phi):
        plt.figure(figsize=(self.fsize*2,self.fsize*len(names)))
        for i in range(len(names)):
            plt.subplot(len(names), 1, i+1)
            plt.plot(true[:, i], true[:, i]-compare[:, i], 'o', color='purple', label='True - Predicted', markersize=2)
            plt.xlabel('True ' + names[i])
            plt.legend()
            plt.ylabel('Difference')
    
    def predicted_vs_true(self, compare, true, names, wrap_phi):
        plt.figure(figsize=(self.fsize*2,self.fsize*len(names)))
        for i in range(len(names)):
            plt.subplot(len(names), 1, i+1)
            plt.plot(true[:, i], compare[:, i], 'o', color='g', markersize=2)
            line = np.linspace(np.min(true[:, i]), np.max(true[:, i]), 100)
            plt.plot(line, line, color='b')
            plt.xlabel('True')
            plt.title(names[i])
            plt.ylabel('Predicted')

In [None]:
display = Analysis()

predictions_unscaled = model.predict([testX])
true_unscaled = testY.reshape((testY.shape[0], -1))
predictions_unscaled = predictions_unscaled.reshape((predictions_unscaled.shape[0], -1))

total_predictions = model.predict([np.append(trainX,testX,axis=0)])
total_predictions = total_predictions.reshape((total_predictions.shape[0], -1))
(Y_total, TO_maxmean0), _ = Scaler.scale_arrays(Y_keys, Y_methods, True)

predictions_origscale = Scaler.invscale_arrays(Y_keys, total_predictions, _, Y_methods, TO_maxmean0)[split:,:]
true_origscale = Scaler.invscale_arrays(Y_keys, Y_total, _, Y_methods, TO_maxmean0)[split:,:]

del Y_total
del TO_maxmean0
del _
del total_predictions

# Training scale plots 

In [None]:
display.display_errors(predictions_unscaled, true_unscaled, Y_names, False)

In [None]:
display.predictions_vs_sample(predictions_unscaled, true_unscaled, Y_names, False)

In [None]:
display.variable_histogram(predictions_unscaled, true_unscaled, Y_names, False, Y_bins)

In [None]:
display.difference_histogram(predictions_unscaled, true_unscaled, Y_names, False, Y_bins)

In [None]:
display.predicted_vs_true(predictions_unscaled, true_unscaled, Y_names, False)

 # Original scale plots

In [None]:
display.display_errors(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
display.predictions_vs_sample(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
display.variable_histogram(predictions_origscale, true_origscale, Y_keys, True, [None for name in Y_names])

In [None]:
display.predicted_vs_true(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
# weights, biases = model.layers[0].get_weights()


print(model.layers)

In [None]:
weights = model.layers[3].get_weights()
print(weights)