In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Layer, Conv1D, Flatten, Dense, Input, Concatenate, Masking, LSTM, TimeDistributed, Lambda, Reshape, Multiply, BatchNormalization, Bidirectional, Dot, RepeatVector, Add
from tensorflow.keras import regularizers 
from tensorflow.keras import initializers
import h5py 
import os 
from clr_callback import *
from tensorflow.keras.callbacks import TensorBoard
import tensorflow.keras.backend as K  
from tensorflow.keras.optimizers import * 
# import keras_one_cycle_clr as ktool

os.environ["CUDA_VISIBLE_DEVICES"]="1"

%matplotlib inline 

dataset = h5py.File('./../../../../../data/hongtao/both-5-8-2020.h5','r')
# ['j1_pt', 'j1_eta', 'j1_phi', 'j1_x', 'j1_y', 'j1_z', 'j1_m', 'j1_e', 'j1_DL1r', 'j1_isbtag', 'j2_pt', 'j2_eta', 'j2_phi', 'j2_x', 'j2_y', 'j2_z', 'j2_m', 'j2_e', 'j2_DL1r', 'j2_isbtag', 'j3_pt', 'j3_eta', 'j3_phi', 'j3_x', 'j3_y', 'j3_z', 'j3_m', 'j3_e', 'j3_DL1r', 'j3_isbtag', 'j4_pt', 'j4_eta', 'j4_phi', 'j4_x', 'j4_y', 'j4_z', 'j4_m', 'j4_e', 'j4_DL1r', 'j4_isbtag', 'j5_pt', 'j5_eta', 'j5_phi', 'j5_x', 'j5_y', 'j5_z', 'j5_m', 'j5_e', 'j5_DL1r', 'j5_isbtag', 'j6_pt', 'j6_eta', 'j6_phi', 'j6_x', 'j6_y', 'j6_z', 'j6_m', 'j6_e', 'j6_DL1r', 'j6_isbtag', 'lep_pt', 'lep_eta', 'lep_phi', 'lep_x', 'lep_y', 'lep_z', 'lep_e', 'met_met', 'met_phi']

X_keys = ['j1_pt', 'j1_eta', 'j1_phi', 'j1_m', 'j1_DL1r', 'j2_pt', 'j2_eta', 'j2_phi', 'j2_m', 'j2_DL1r', 'j3_pt', 'j3_eta', 'j3_phi', 'j3_m', 'j3_DL1r', 'j4_pt', 'j4_eta', 'j4_phi', 'j4_m', 'j4_DL1r', 'j5_pt', 'j5_eta', 'j5_phi', 'j5_m', 'j5_DL1r', 'j6_pt', 'j6_eta', 'j6_phi', 'j6_m', 'j6_DL1r', 'lep_pt', 'lep_eta', 'lep_phi', 'met_met', 'met_phi']

Y_keys = ['th_pt', 'th_eta','th_phi','th_m', 'wh_pt', 'wh_eta', 'wh_phi', 'wh_m', 'tl_pt', 'tl_eta', 'tl_phi', 'tl_m', 'wl_pt', 'wl_eta', 'wl_phi', 'wl_m']
phi_keys = list(filter(lambda a: 'phi' in a, dataset.keys()))
eta_keys = list(filter(lambda a: 'eta' in a, dataset.keys()))
pt_keys =  list(filter(lambda a: 'pt' in a, dataset.keys()))
m_keys = list(filter(lambda a: 'm' in a, dataset.keys()))
DL1r_keys = list(filter(lambda a: 'DL1r' in a, dataset.keys()))

Y_length = len(Y_keys)
X_length = len(X_keys)
crop0 =  1000000

In [2]:
import scale_variables
import shape_timesteps

# How to scale each input/output
def get_methods(keys):
    types = {'pt':'cart_pt', 'eta':'meanmax', 'phi':'sincos', 'DL1r':'DL1r', 'm':'meanmax', 'x':'null', 'y':'null', 'z':'null', 'e':'null', 'isbtag':'null','met':'meanmax'}
    var_types = [key.split('_')[1] for key in keys]
    methods = [types[var] for var in var_types]
    return methods

X_methods = get_methods(X_keys)
Y_methods = get_methods(Y_keys)

Scaler = scale_variables.Scale_variables()
(X_total, X_maxmean), X_names = Scaler.scale_arrays(X_keys, X_methods, True)
(Y_total, Y_maxmean), Y_names = Scaler.scale_arrays(Y_keys, Y_methods, True)

error = Scaler.test_inverse(Y_keys, Y_methods, True)
print('Max scaling error: {}'.format(error))

Max scaling error: 4.547473508864641e-13


In [3]:
# Create X and test array
split = int(np.floor(0.9*crop0)) # 80/20 split 

trainY, testY = Y_total[0:split,:], Y_total[split:,:]

timestep_builder = shape_timesteps.Shape_timesteps()
totalX_jets, totalX_other = timestep_builder.reshape_X(X_total, X_names, False,True)


In [42]:

    
def random_choice_noreplace(m,n, axis=-1):
    # m, n are the number of rows, cols of output
    return np.array(np.random.rand(m,n).argsort(axis=axis))

def random_shuffle_jets(jets):
    m,n = jets.shape[0], jets.shape[1]
    idx = random_choice_noreplace(m,n,axis=1).reshape((m,n,1))
    return np.take_along_axis(jets,idx,axis=1)

def random_permutation_matrix(jets):
    m,n = jets.shape[0], jets.shape[1]
    ind = random_choice_noreplace(m,n,axis=1).flatten()
    fast = np.tile(np.arange(0,n),m).flatten()
    slow = np.repeat(np.arange(0,n),m).flatten()
    indx = np.stack([slow,fast,ind])
    zeros = np.zeroes((m,n,n))
    zeros[indx] = 1
    return zeros 
    
    
# idx = random_choice_noreplace(3,4,axis=1) #.reshape((3,4,1))
# print(idx.flatten())
# id1 = np.array([[0,1,2,3],[0,1,2,3],[0,1,2,3]])
# print(id1)

# x = np.array([[[1,2,3],[4,5,6],[7,8,9]]])
# idx = [[0,0,0],[0,1,2], [0,2,1]]
# print(x.shape, y.shape)
# print(x[tuple(idx)])


# a = np.array([[[1,8], [9,5], [4,5], [3,3]],
#        [[4,0], [7,8], [3,1], [5,2]],
#        [[7,5], [4,9], [4,2], [9,4]]])

# # print(a.shape)
# 
# print(np.take_along_axis(a,idx,axis=1))
# print(np.take(a,idx,axis=1))


[0 3 2 1 0 1 3 2 1 0 2 3]


In [None]:



# DL1r = totalX_jets[:,:,-1]
# argDL1r = np.argsort(DL1r,axis=-1)
# argDL1r = argDL1r.reshape((argDL1r.shape[0], -1, 1))
# jets_byDL1r = np.take_along_axis(totalX_jets, argDL1r, axis=1)
# # totalX_jets = np.concatenate([totalX_jets, jets_byDL1r], axis=1)

# totalX_jets == jets_byDL1r

totalY_jets = totalX_jets
totalX_jets = random_shuffle_jets(totalX_jets)

In [None]:
trainX_jets, testX_jets = totalX_jets[0:split,:,:], totalX_jets[split:,:,:]
trainX_other, testX_other = totalX_other[0:split,:], totalX_other[split:,:]
trainY_jets, testY_jets = totalY_jets[0:split,:,:], totalY_jets[split:,:,:]

# Save some memory 
del totalX_jets
del totalX_other 

print(trainX_jets.shape, trainX_other.shape, trainY_jets.shape)
print(testX_jets.shape, testX_other.shape, testY_jets.shape)

In [None]:
# Histograms of total  Y variables 
show = False

other_bins = np.linspace(-1, 1, 40)
phi_bins = np.linspace(-1, 1, 40)
pt_bins = np.linspace(-1, 1, 40)
Y_bins = [phi_bins if 'phi' in name else pt_bins if 'pt' in name else other_bins for name in Y_names]

if show:
    plt.figure(figsize=(6,6*trainY.shape[1]))
    for i in range(0, trainY.shape[1]):
        plt.subplot(trainY.shape[1], 1, i+1)
        bins = Y_bins[i]
        plt.hist(Y_total[:,i], bins, histtype='step')
        plt.xlabel(Y_names[i])
        plt.ylabel('Frequency')

In [None]:
# Histograms of X variables (without the mask)
show = False

if show:
    plt.figure(figsize=(6,6*X_total.shape[1]))
    for i in range(0, X_total.shape[1]):
        plt.subplot(X_total.shape[1], 1, i+1)
        plt.hist(X_total[:,i], 40, histtype='step')
        plt.xlabel(X_names[i])
        plt.ylabel('Frequency')

 

In [None]:
class SinkhornLayer(Layer):
    def __init__(self, n_iters=21, temperature=0.01, **kwargs):
        self.supports_masking = False
        self.n_iters = n_iters
        self.temperature = K.constant(temperature)
        super(SinkhornLayer, self).__init__(**kwargs)

    def call(self, input_tensor, mask=None):
        input_shape = tf.shape(input_tensor)
        n = K.shape(input_tensor)[1]
        log_alpha = K.reshape(input_tensor, [-1, n, n])
        log_alpha /= self.temperature

        for _ in range(self.n_iters):
            log_alpha -= K.reshape(K.log(K.sum(K.exp(log_alpha), axis=2)), [-1, n, 1])
            log_alpha -= K.reshape(K.log(K.sum(K.exp(log_alpha), axis=1)), [-1, 1, n])
        return tf.reshape(K.exp(log_alpha), self.compute_output_shape(input_shape)) 

    def compute_mask(self, x, mask=None):
        return None

    def compute_output_shape(self, input_shape):
        return input_shape





In [None]:
# Build model


max_len = trainX_jets.shape[1]
def build_classifier():
    jet_input = Input(shape=(trainX_jets.shape[1], trainX_jets.shape[2]))
#     other_input = Input(shape=(trainX_other.shape[1]))
#     flat_jets =  Flatten()(jet_input)
#     concat0 = concatenate([other_input, flat_jets])
    
    embedding = TimeDistributed(Dense(units=256))(jet_input)
    # pairwise scalar products to take object 'interactions' into account
    dot = Dot([-1, -1])([embedding, embedding])

    # reshaping into a single vector
    interactions = Reshape(target_shape=(trainX_jets.shape[1] * trainX_jets.shape[2],))(dot)

    # two independent fully-connected layers with different activations
    interactions1 = Dense(units=max_len * max_len, activation="sigmoid")(interactions)
    interactions2 = Dense(units=max_len * max_len, activation="tanh")(interactions)

    # (this trick seems to be an important one)
    added_interactions = Add()([interactions1, interactions2])

    # appending 'interactions' to embeddings
    interactions_replicated = RepeatVector(max_len)(added_interactions)
    joined = Concatenate(axis=-1)([embedding, interactions_replicated])

    # dense layer for dense layer outputs of the size equal to length
    layer_for_combining = TimeDistributed(Dense(units=max_len, activation="tanh", ),
                                          input_shape=(max_len, max_len ** 2 + max_len))(joined)

    # permutation approximation layer
    sinkhorn = SinkhornLayer(n_iters=20, temperature=0.03, name="sinkhorn")(layer_for_combining)
    permute_apply = Dot(axes=[-2, -2])([sinkhorn, jet_input])
    
    model = keras.models.Model(inputs=[jet_input], outputs=sinkhorn)
    lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=1e-3, decay_steps=10000,end_learning_rate=1e-4,power=0.25)
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
    model.compile(loss='mse', optimizer= optimizer, metrics=['mse'])
    return model 

# model = keras.models.load_model('Jet_Reweight_cartpt.keras')
model = build_classifier()
model.summary()

In [None]:
# Fit model

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6, verbose=1)

clr_triangular = CyclicLR(base_lr=1e-5, max_lr=1e-4, step_size=5000, mode='exp_range',gamma=0.9999)
# class_weight = {0:2, 1:1, 2:1, 3:1, 4:1, 5:2, 6:1, 7:1, 8:1, 9:1, 10:2, 11:1, 12:1, 13:1, 14:1, 15:2, 16:1, 17:1, 18:1, 19:1}


Epochs= 64

# del X_total
# del Y_total
# def loss(true, pred):
#     return K.mean(K.square(true-pred)*(1+K.square(true)))



history = model.fit([trainX_jets], trainY_jets, verbose=1, epochs=Epochs,
                   validation_data=([testX_jets], testY_jets), shuffle=True, callbacks=[early_stop],
                    batch_size=1000)


In [None]:
# # Fit model

# early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)
# # reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6, verbose=1)

# clr_triangular = CyclicLR(base_lr=1e-5, max_lr=1e-4, step_size=5000, mode='exp_range',gamma=0.9999)
# # class_weight = {0:2, 1:1, 2:1, 3:1, 4:1, 5:2, 6:1, 7:1, 8:1, 9:1, 10:2, 11:1, 12:1, 13:1, 14:1, 15:2, 16:1, 17:1, 18:1, 19:1}


# Epochs= 256

# # del X_total
# # del Y_total
# # def loss(true, pred):
# #     return K.mean(K.square(true-pred)*(1+K.square(true)))



# history = model.fit([trainX_jets, trainX_other], trainY, verbose=1, epochs=Epochs,
#                    validation_data=([testX_jets, testX_other], testY), shuffle=True, callbacks=[early_stop],
#                     batch_size=1000)


In [None]:
# plt.xlabel('Training Iterations')
# plt.ylabel('Learning Rate')
# plt.title("CLR - 'triangular' Policy")
# plt.plot(clr_triangular.history['iterations'], clr_triangular.history['lr'])

In [None]:
plt.figure(figsize=(12,8))
plt.plot(history.history['loss'], label='training')
plt.plot(history.history['val_loss'], label='validation')
plt.xlabel('epoch')
plt.ylabel('mse loss')
plt.legend()
plt.title('MSE loss')

In [None]:
predictions_unscaled = model.predict([testX_jets]).reshape((-1,36))
true_unscaled = testY_jets.reshape((-1,36))



# total_predictions = model.predict([np.append(trainX_jets,testX_jets,axis=0), np.append(trainX_other,testX_other,axis=0)])
# (Y_total, TO_maxmean0), _ = Scaler.scale_arrays(Y_keys, Y_methods, True)

# predictions_origscale = Scaler.invscale_arrays(Y_keys, total_predictions, _, Y_methods, TO_maxmean0)[split:,:]
# true_origscale = Scaler.invscale_arrays(Y_keys, Y_total, _, Y_methods, TO_maxmean0)[split:,:]

# del Y_total
# del TO_maxmean0
# del _
# del total_predictions

In [None]:
print(predictions_unscaled.shape, true_unscaled.shape)

In [None]:
# np.save('true', predictions_origscale)
# np.save('pred', true_origscale)

# Training scale plots 

In [None]:
import analysis
jet_names = list(filter(lambda a: 'j' in a, X_names))
Analysis = analysis.Analysis 
Analysis.display_errors(predictions_unscaled, true_unscaled, jet_names, False)

In [None]:
# Analysis.display_errors(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
Analysis.predictions_vs_sample(predictions_unscaled, true_unscaled, jet_names, False)

In [None]:
Analysis.variable_histogram(predictions_unscaled, true_unscaled, jet_names, False, Y_bins)

In [None]:
# Analysis.difference_histogram(predictions_unscaled, true_unscaled, Y_names, False, Y_bins)

In [None]:
Analysis.predicted_vs_true(predictions_unscaled, true_unscaled, jet_names, False)

 # Original scale plots

In [None]:
Analysis.display_errors(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
Analysis.predictions_vs_sample(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
Analysis.variable_histogram(predictions_origscale, true_origscale, Y_keys, True, [None for name in Y_names])

In [None]:
Analysis.predicted_vs_true(predictions_origscale, true_origscale, Y_keys, True)

In [None]:
# model.save('Jet_Reweight_lr_decay_ex.keras')

# Observables

In [None]:
import observables 
import importlib
importlib.reload(observables)
truths = observables.fill_observables(true_origscale, True, Y_keys)
preds = observables.fill_observables(predictions_origscale, False, Y_keys)

# top_dphi=np.abs(th_phi-tl_phi)
plt.figure(figsize=(12,6))
observables.plot_hist(truths['top_dphi'], preds['top_dphi'])

In [None]:
# top_m0= th_m**2-th_p**2 + tl_m**2-tl_p**2
plt.figure(figsize=(12,6))
observables.plot_hist(truths['top_m0'],preds['top_m0'])

In [None]:
# eta_cm=0.5*(th_eta-tl_eta), eta_boost=0.5*(th_eta+tl_eta)
plt.figure(figsize=(12,12))
plt.subplot(211)
observables.plot_hist(truths['eta_cm'], preds['eta_cm'])
plt.subplot(212)
observables.plot_hist(truths['eta_boost'], preds['eta_boost'])


In [None]:
# th_Pout=dot(th_P, cross(tl_P,ez)/norm(tl_P,ez)), tl_Pout=dot(tl_P, cross(th_P,ez)/norm(th_P,ez))
plt.figure(figsize=(12,12))
plt.subplot(211)
observables.plot_hist(truths['th_Pout'], preds['th_Pout'])
plt.subplot(212)
observables.plot_hist(truths['tl_Pout'], preds['tl_Pout'])


In [None]:
# pt_tot = th_pt+tl_pt
plt.figure(figsize=(12,6))
observables.plot_hist(truths['pt_tot'],preds['pt_tot'])

In [None]:
# model.save('Jet_Reweight_cartpt.keras')

In [None]:
np.save('aug_9/Jet_Reweight_cartpt_pred', predictions_origscale)
np.save('aug_9/Jet_Reweight_cartpt_true', true_origscale)
truth_observables = [truths[list(truths.keys())[i]].value for i in range(len(list(truths.keys())))]
pred_observables = [preds[list(preds.keys())[i]].value for i in range(len(list(preds.keys())))]
truth_observables = np.stack(truth_observables, axis=1)
truth_observables = truth_observables.reshape((truth_observables.shape[0], -1))
pred_observables = np.stack(pred_observables, axis=1)
pred_observables = pred_observables.reshape((pred_observables.shape[0], -1))

np.save('aug_9/Jet_Reweight_cartpt_pred_observables', pred_observables)
np.save('aug_9/Jet_Reweight_cartpt_true_observables', truth_observables)
print(list(preds.keys()))