# Training with old ntuple

# Prepare

## 1 - Load modules

In [1]:
%matplotlib inline  # make sure plot shows
from lfv_pdnn.data_io.get_arrays import *
from lfv_pdnn.train import model
from lfv_pdnn.train.train_utils import get_input_array

In [2]:
# Constants
#old_bkg_path = "E:/data/lfv/ntuples_last_run/TestData/data_npy"  # Windows can recognize both "/" and "\"
old_bkg_path = "/home/paperspace/data/lfv/data_npy/"
#old_sig_path = "E:/data/lfv/ntuples_last_run/train_array_0909/data_npy/emu"
old_sig_path = "/home/paperspace/data/lfv/data_npy/emu"

## 2 - Load old array
### a) load old background samples

In [3]:
xb_dict_old = get_old_bkg(old_bkg_path)

### b) load old signal samples

In [4]:
xs_dict_old = get_old_sig(old_sig_path)

## 1 - Define constants & functions for training.

In [5]:
print(xb_dict_old.keys())

In [6]:
selected_features_emu = [0, 1, 2, 3, 5, 6, 7, 15, 16, 17, 18, 19]
selected_features_etau = [0, 1, 2, 3, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19]
selected_features_mutau = [0, 5, 6, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19]

plot_bkg_list = ['di_boson', 'drell_yan', 'top_quark', 'w_jets', 'z_ll']
plot_bkg_dict = {key:xb_dict_old[key] for key in plot_bkg_list}

## 2 - Example for using the code

In [7]:
# Example for emu channel training:
xs, xb = get_input_array(xs_dict_old, '500GeV', xb_dict_old, 'all', -4) # -4 for emu
model_deep = model.Model_1016("model_test", len(selected_features_emu))
model_deep.prepare_array(xs, xb, selected_features_emu, -4, sig_weight=10000, bkg_weight=10000, test_rate=0.2)
model_deep.compile()
model_deep.train(batch_size=128, epochs=10, val_split=0.2, sig_class_weight=10., bkg_class_weight=1., verbose=1)
#model_deep.save_model()

In [8]:
# Example for showing performance
model_deep.show_performance(figsize=(16, 8))
fig, ax = plt.subplots(ncols=2, figsize=(16, 4))
model_deep.plot_scores_separate(ax[0], plot_bkg_dict, plot_bkg_list, selected_features_emu,
                                sig_arr=None, sig_weights=None, plot_title='training scores',
                                bins=40, range=(-0.25, 1.25), density=True, log=False)
model_deep.plot_scores_separate(ax[1], plot_bkg_dict, plot_bkg_list, selected_features_emu,
                                sig_arr=None, sig_weights=None, plot_title='training scores',
                                bins=40, range=(-0.25, 1.25), density=True, log=True)

In [12]:
# Example for loading model and make plots
xs, xb = get_input_array(xs_dict_old, '500GeV', xb_dict_old, 'all', -4) # -4 for emu
model_load = model.Model_1016("model_load_test", len(selected_features_emu))
model_load.prepare_array(xs, xb, selected_features_emu, -4, sig_weight=10000, bkg_weight=10000, test_rate=0.2)
model_load.load_model('model_test', model_class='*', date='*', version='')
model_load.show_performance(figsize=(16, 8))
fig, ax = plt.subplots(ncols=2, figsize=(16, 4))
model_load.plot_scores_separate(ax[0], plot_bkg_dict, plot_bkg_list, selected_features_emu,
                                sig_arr=None, sig_weights=None, plot_title='training scores',
                                bins=40, range=(-0.25, 1.25), density=True, log=False)
model_load.plot_scores_separate(ax[1], plot_bkg_dict, plot_bkg_list, selected_features_emu,
                                sig_arr=None, sig_weights=None, plot_title='training scores',
                                bins=40, range=(-0.25, 1.25), density=True, log=True)