In [17]:
import datetime
import os
import shutil

import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.metrics import MeanAbsolutePercentageError, MeanAbsoluteError, RootMeanSquaredError

import definitions
from training import train, data
from training.loguniform import LogUniform
from training.stepuniform import StepUniform
from training.steploguniform import StepLogUniform
from scipy.stats.distributions import randint
import numpy as np
import pandas as pd

import altair as alt

#alt.data_transformers.enable('data_server')
alt.data_transformers.disable_max_rows()

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

# Missing Mass Regression
## W -> l nu

In [18]:
dataset = 'H125'
target = 'nu'

In [19]:
jigsaw_train, jigsaw_val, jigsaw_test = data.get_jigsaw(dataset=dataset, target=target)
x_train, y_train, x_val, y_val, x_test, y_test = data.get_datasets(dataset=dataset, target=target, scale_x=True)
print("x_train:")
print(x_train)
print(f"num training samples: {x_train.shape[0]}")
print(f"num validation samples: {x_val.shape[0]}")
print(f"num testing samples: {x_test.shape[0]}")

x_train:
           METx      METy  Lax_reco  Lay_reco  Laz_reco  Lam_reco  Lbx_reco  \
0     -0.250382 -0.296569  0.747626  0.136942 -2.787683  0.000505 -0.503431   
1      0.917223  0.350753 -1.056465 -0.561564 -4.083565  0.000505  0.133056   
2      0.497515 -0.969540  0.217005  0.318689 -0.864559  0.000505 -0.720707   
3     -0.498057 -0.148172  0.304802 -0.304384  1.953907  0.000505  0.187068   
4      0.716234 -0.713684 -0.527178 -0.043852  0.856596  0.000505 -0.195243   
...         ...       ...       ...       ...       ...       ...       ...   
79995 -0.846182 -0.359765  0.485321 -0.027004  0.741265  0.000505  0.354674   
79996 -0.883018 -0.147421  0.241812 -0.209974  0.368327  0.000505  0.635020   
79997  0.070536 -0.100055  0.398308 -0.248272  0.928151  0.000505 -0.475031   
79998  0.988216  0.109721 -0.536616 -0.212984 -1.215416  0.000505 -0.457787   
79999 -0.094164  0.650613  0.534108 -0.204384  1.065219  0.000505 -0.446131   

       Lby_reco  Lbz_reco  Lbm_reco  
0   

## Dataset
Simple W -> l nu data samples are used. Below, the generated W mass (Wm_gen), jigsaw reconstructed W mass (Wm_reco) and the difference of the two are shown.

In [20]:
Nax_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][0]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nay_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][1]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Naz_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][2]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nbz_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][3]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nax_gen_chart | Nay_gen_chart | Naz_gen_chart | Nbz_gen_chart

In [21]:
Nax_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][0]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nay_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][1]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Naz_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][2]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nbz_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][3]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nax_reco_chart | Nay_reco_chart | Naz_reco_chart | Nbz_reco_chart

In [22]:
jigsaw_difference_Nax = pd.DataFrame({'Nax_gen - Nax_reco': y_test.values[:, 0] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][0]].values})
jigsaw_difference_Nax_chart = alt.Chart(jigsaw_difference_Nax).mark_bar().encode(alt.X("Nax_gen - Nax_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Nay = pd.DataFrame({'Nay_gen - Nay_reco': y_test.values[:, 1] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][1]].values})
jigsaw_difference_Nay_chart = alt.Chart(jigsaw_difference_Nay).mark_bar().encode(alt.X("Nay_gen - Nay_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Naz = pd.DataFrame({'Naz_gen - Naz_reco': y_test.values[:, 2] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][2]].values})
jigsaw_difference_Naz_chart = alt.Chart(jigsaw_difference_Naz).mark_bar().encode(alt.X("Naz_gen - Naz_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Nbz = pd.DataFrame({'Nbz_gen - Nbz_reco': y_test.values[:, 3] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][3]].values})
jigsaw_difference_Nbz_chart = alt.Chart(jigsaw_difference_Nbz).mark_bar().encode(alt.X("Nbz_gen - Nbz_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Nax_chart | jigsaw_difference_Nay_chart | jigsaw_difference_Naz_chart | jigsaw_difference_Nbz_chart

## Results

In [23]:
df_train, df_val, df_test = data.get_datasets(dataset=dataset, target=target, x_y_split=False)

print(df_train)

Hx_gen  Hy_gen     Hz_gen  Hm_gen       Hx_reco       Hy_reco  \
0        -0.0     0.0 -462.33320   125.0  0.000000e+00  4.440892e-15   
1         0.0     0.0 -540.76140   125.0 -1.421085e-14  0.000000e+00   
2         0.0     0.0 -302.00290   125.0  0.000000e+00 -8.881784e-15   
3         0.0    -0.0  339.42230   125.0  8.881784e-15 -1.065814e-14   
4        -0.0     0.0  244.77860   125.0  1.776357e-14  4.440892e-16   
...       ...     ...        ...     ...           ...           ...   
79995     0.0     0.0  167.81920   125.0 -3.552714e-15 -1.776357e-15   
79996    -0.0    -0.0  -34.02435   125.0 -1.776357e-15  3.552714e-15   
79997    -0.0    -0.0  767.80310   125.0  0.000000e+00  0.000000e+00   
79998    -0.0    -0.0  -98.31551   125.0 -3.552714e-15  0.000000e+00   
79999     0.0     0.0  837.30960   125.0  0.000000e+00  3.552714e-15   

         Hz_reco    Hm_reco    Wax_gen    Way_gen  ...    Nbx_gen    Nby_gen  \
0     -263.27580  128.90760  33.930650  10.610390  ... -13.226

In [24]:
log_dir = definitions.LOG_DIR / dataset / 'v2'
model = tf.keras.models.load_model(str(log_dir / 'best_model.h5'))

In [25]:
y_pred = model.predict(x_test)
mean = np.mean(y_train).values
std = np.std(y_train).values
y_pred = y_pred * std + mean

print(y_pred)

[[  -0.51840291    2.21413307   86.99183364   56.71054074]
 [  -2.40070794   -0.72154385 -155.10676916 -205.95468388]
 [  -1.03907853    0.34664739  180.25697491  301.89872684]
 ...
 [  -1.29388818    1.96523223   24.42660565   26.14021945]
 [   2.9472295    -0.45342309  240.305746    234.71916013]
 [  -2.78900266   -4.15025082 -314.65725137 -147.66492116]]


In [26]:
print(jigsaw_test)
print(y_test)
print(y_pred)

Nax_reco   Nay_reco   Naz_reco    Nbz_reco
0      6.133221  33.823960   12.93972   29.560450
1    -18.267890 -21.116640  -31.31883 -140.937800
2     10.463300 -44.193660  157.14770  165.316700
3     33.694610 -11.768850   69.92698   47.082390
4    -11.203410  25.987010  -80.83449  -28.112800
...         ...        ...        ...         ...
9995 -20.101950  11.728790   59.51912   84.950160
9996  14.759800  -2.551917  -39.53232 -118.479000
9997  -4.632653 -14.447920    6.00164    6.374954
9998 -18.708870  -8.249379   28.43143  151.108200
9999  19.258010  -2.164513 -122.73540 -217.848500

[10000 rows x 4 columns]
        Nax_gen    Nay_gen     Naz_gen     Nbz_gen
0      2.314114  12.492300  -19.552300   -9.673151
1     -4.793596  -5.698381  -69.268640  -38.978670
2     -4.498787   6.614501    8.131576   32.906390
3      1.986047  17.524520   51.868040   16.743930
4     -8.463210  -7.345803  -52.134420 -200.526000
...         ...        ...         ...         ...
9995   4.677482  -3.0738

In [27]:
jigsaw_test = jigsaw_test.values
y_test = y_test.values
print(jigsaw_test)
print(y_test)

[[   6.133221   33.82396    12.93972    29.56045 ]
 [ -18.26789   -21.11664   -31.31883  -140.9378  ]
 [  10.4633    -44.19366   157.1477    165.3167  ]
 ...
 [  -4.632653  -14.44792     6.00164     6.374954]
 [ -18.70887    -8.249379   28.43143   151.1082  ]
 [  19.25801    -2.164513 -122.7354   -217.8485  ]]
[[   2.314114   12.4923    -19.5523     -9.673151]
 [  -4.793596   -5.698381  -69.26864   -38.97867 ]
 [  -4.498787    6.614501    8.131576   32.90639 ]
 ...
 [  11.82702    21.70403   -51.05338     3.21093 ]
 [  34.44451   -15.19392    54.71562    58.40485 ]
 [ -13.42579   -26.4406   -186.8114    -97.59522 ]]


In [28]:
def make_chart_data(idx):
    return pd.DataFrame({'Jigsaw': jigsaw_test[:, idx] - y_test[:, idx], 'NN': y_pred[:, idx] - y_test[:, idx]})
Nax_chart_data = make_chart_data(0)
Nay_chart_data = make_chart_data(1)
Naz_chart_data = make_chart_data(2)
Nbz_chart_data = make_chart_data(3)

In [29]:
def make_chart(chart_data, name):
    return alt.Chart(chart_data).transform_fold(['Jigsaw', 'NN'], as_=['Method', name]).mark_area(interpolate='step-after', line=True, opacity=0.7).encode(alt.X(f"{name}:Q", bin=alt.Bin(extent=[-100, 100], step=5)), y=alt.Y("count()", stack=None), color='Method:N')
Nax_chart = make_chart(Nax_chart_data, 'Nax error')
Nay_chart = make_chart(Nay_chart_data, 'Nay error')
Naz_chart = make_chart(Naz_chart_data, 'Naz error')
Nbz_chart = make_chart(Nbz_chart_data, 'Nbz error')
Nax_chart | Nay_chart | Naz_chart | Nbz_chart

In [30]:
print(jigsaw_test)

[[   6.133221   33.82396    12.93972    29.56045 ]
 [ -18.26789   -21.11664   -31.31883  -140.9378  ]
 [  10.4633    -44.19366   157.1477    165.3167  ]
 ...
 [  -4.632653  -14.44792     6.00164     6.374954]
 [ -18.70887    -8.249379   28.43143   151.1082  ]
 [  19.25801    -2.164513 -122.7354   -217.8485  ]]


In [31]:
print('Jigsaw:')
print('\tmae = ' + str(tf.keras.losses.MAE(np.transpose(y_test), np.transpose(jigsaw_test)).numpy()))
print(f'\tmape = ' + str(tf.keras.losses.MAPE(np.transpose(y_test), np.transpose(jigsaw_test)).numpy()))
print('\trmse = ' + str(tf.keras.losses.MSE(np.transpose(y_test), np.transpose(jigsaw_test)).numpy()**0.5))
print('NN:')
print('\tmae = ' + str(tf.keras.losses.MAE(np.transpose(y_test), np.transpose(y_pred)).numpy()))
print('\tmape = ' + str(tf.keras.losses.MAPE(np.transpose(y_test), np.transpose(y_pred)).numpy()))
print('\trmse = ' + str(tf.keras.losses.MSE(np.transpose(y_test), np.transpose(y_pred)).numpy()**0.5))

Jigsaw:
	mae = [22.01058452 22.0039148  98.14378848 99.78789455]
	mape = [918.00857536 705.03469392 800.91343503 961.40603564]
	rmse = [ 26.00207054  25.95102474 169.48412558 176.76883517]
NN:
	mae = [ 14.00216536  14.30764843 114.6729486  113.72390387]
	mape = [ 156.86488964  114.64347983 1800.20344662 2034.31457134]
	rmse = [ 17.83284565  18.05841631 157.78544215 162.28037364]


In [40]:
df_test['Nam_pred'] = 0.0
df_test['Nbm_pred'] = 0.0
df_test['Nax_pred'] = y_pred[:, 0]
df_test['Nay_pred'] = y_pred[:, 1]
df_test['Naz_pred'] = y_pred[:, 2]
df_test['Nbz_pred'] = y_pred[:, 3]
data.calc_H125(df_test)
print(df_test)

Hx_gen  Hy_gen      Hz_gen  Hm_gen       Hx_reco       Hy_reco  \
0       -0.0     0.0    13.27504   125.0 -5.329071e-15  0.000000e+00   
1        0.0    -0.0  -280.50480   125.0  3.552714e-15  8.493206e-15   
2        0.0     0.0   363.50360   125.0  0.000000e+00  3.552714e-15   
3        0.0    -0.0   185.62220   125.0 -1.332268e-15  0.000000e+00   
4        0.0     0.0  -361.60840   125.0 -7.105427e-15 -3.552714e-15   
...      ...     ...         ...     ...           ...           ...   
9995     0.0     0.0  1205.17900   125.0  1.243450e-14 -7.105427e-15   
9996     0.0    -0.0  -550.30910   125.0  0.000000e+00  1.776357e-15   
9997     0.0    -0.0   -35.46571   125.0  1.776357e-15  3.552714e-15   
9998     0.0    -0.0   292.66090   125.0  0.000000e+00 -2.581269e-15   
9999    -0.0     0.0  -624.99180   125.0  1.421085e-14  7.105427e-15   

        Hz_reco    Hm_reco    Wax_gen    Way_gen  ...    Waz_pred  \
0      85.00065  108.33680   8.983986   1.842351  ...   85.102894   
1  