In [16]:
import datetime
import os
import shutil

import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.metrics import MeanAbsolutePercentageError, MeanAbsoluteError, RootMeanSquaredError

import definitions
from training import train, data
from training.loguniform import LogUniform
from training.stepuniform import StepUniform
from training.steploguniform import StepLogUniform
from scipy.stats.distributions import randint
import numpy as np
import pandas as pd

import altair as alt

#alt.data_transformers.enable('data_server')
alt.data_transformers.disable_max_rows()

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

# Missing Mass Regression
## H -> WW -> lnulnu

In [17]:
dataset = 'H125'
target = 'nuW'

In [18]:
jigsaw_train, jigsaw_val, jigsaw_test = data.get_jigsaw(dataset=dataset, target=target)
x_train, y_train, x_val, y_val, x_test, y_test = data.get_datasets(dataset=dataset, target=target, scale_x=True)
print("x_train:")
print(x_train)
print(f"num training samples: {x_train.shape[0]}")
print(f"num validation samples: {x_val.shape[0]}")
print(f"num testing samples: {x_test.shape[0]}")

x_train:
           METx      METy  Lax_reco  Lay_reco  Laz_reco  Lam_reco  Lbx_reco  \
0     -0.250382 -0.296569  0.747626  0.136942 -2.787683  0.000505 -0.503431   
1      0.917223  0.350753 -1.056465 -0.561564 -4.083565  0.000505  0.133056   
2      0.497515 -0.969540  0.217005  0.318689 -0.864559  0.000505 -0.720707   
3     -0.498057 -0.148172  0.304802 -0.304384  1.953907  0.000505  0.187068   
4      0.716234 -0.713684 -0.527178 -0.043852  0.856596  0.000505 -0.195243   
...         ...       ...       ...       ...       ...       ...       ...   
79995 -0.846182 -0.359765  0.485321 -0.027004  0.741265  0.000505  0.354674   
79996 -0.883018 -0.147421  0.241812 -0.209974  0.368327  0.000505  0.635020   
79997  0.070536 -0.100055  0.398308 -0.248272  0.928151  0.000505 -0.475031   
79998  0.988216  0.109721 -0.536616 -0.212984 -1.215416  0.000505 -0.457787   
79999 -0.094164  0.650613  0.534108 -0.204384  1.065219  0.000505 -0.446131   

       Lby_reco  Lbz_reco  Lbm_reco  
0   

## Dataset
Simple H -> WW -> lnulnu data samples are used. Below, the regressed variables are shown.

In [19]:
Nax_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][0]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nay_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][1]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Wam_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][2]}:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
Wbm_gen_chart = alt.Chart(y_test).mark_bar().encode(alt.X(f"{definitions.TARGETS[dataset][target][3]}:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
Nax_gen_chart | Nay_gen_chart | Wam_gen_chart | Wbm_gen_chart


In [20]:
Nax_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][0]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Nay_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][1]}:Q", bin=alt.Bin(extent=[0, 200], step=5)), y="count()")
Wam_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][2]}:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
Wbm_reco_chart = alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][3]}:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
Nax_reco_chart | Nay_reco_chart | Wam_reco_chart | Wbm_reco_chart

In [21]:
jigsaw_difference_Nax = pd.DataFrame({'Nax_gen - Nax_reco': y_test.values[:, 0] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][0]].values})
jigsaw_difference_Nax_chart = alt.Chart(jigsaw_difference_Nax).mark_bar().encode(alt.X("Nax_gen - Nax_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Nay = pd.DataFrame({'Nay_gen - Nay_reco': y_test.values[:, 1] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][1]].values})
jigsaw_difference_Nay_chart = alt.Chart(jigsaw_difference_Nay).mark_bar().encode(alt.X("Nay_gen - Nay_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Wam = pd.DataFrame({'Wam_gen - Wam_reco': y_test.values[:, 2] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][2]].values})
jigsaw_difference_Wam_chart = alt.Chart(jigsaw_difference_Wam).mark_bar().encode(alt.X("Wam_gen - Wam_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Wbm = pd.DataFrame({'Wbm_gen - Wbm_reco': y_test.values[:, 3] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][3]].values})
jigsaw_difference_Wbm_chart = alt.Chart(jigsaw_difference_Wbm).mark_bar().encode(alt.X("Wbm_gen - Wbm_reco:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")
jigsaw_difference_Nax_chart | jigsaw_difference_Nay_chart | jigsaw_difference_Wam_chart | jigsaw_difference_Wbm_chart

## Results

In [22]:
df_train, df_val, df_test = data.get_datasets(dataset=dataset, target=target, x_y_split=False)

print(df_train)

Hx_gen  Hy_gen     Hz_gen  Hm_gen       Hx_reco       Hy_reco  \
0        -0.0     0.0 -462.33320   125.0  0.000000e+00  4.440892e-15   
1         0.0     0.0 -540.76140   125.0 -1.421085e-14  0.000000e+00   
2         0.0     0.0 -302.00290   125.0  0.000000e+00 -8.881784e-15   
3         0.0    -0.0  339.42230   125.0  8.881784e-15 -1.065814e-14   
4        -0.0     0.0  244.77860   125.0  1.776357e-14  4.440892e-16   
...       ...     ...        ...     ...           ...           ...   
79995     0.0     0.0  167.81920   125.0 -3.552714e-15 -1.776357e-15   
79996    -0.0    -0.0  -34.02435   125.0 -1.776357e-15  3.552714e-15   
79997    -0.0    -0.0  767.80310   125.0  0.000000e+00  0.000000e+00   
79998    -0.0    -0.0  -98.31551   125.0 -3.552714e-15  0.000000e+00   
79999     0.0     0.0  837.30960   125.0  0.000000e+00  3.552714e-15   

         Hz_reco    Hm_reco    Wax_gen    Way_gen  ...    Nbx_gen    Nby_gen  \
0     -263.27580  128.90760  33.930650  10.610390  ... -13.226

In [23]:
log_dir = definitions.LOG_DIR / dataset / 'nuW-v2'
model = tf.keras.models.load_model(str(log_dir / 'best_model.h5'))

In [24]:
y_pred = model.predict(x_test)
mean = np.mean(y_train).values
std = np.std(y_train).values
print(mean)
print(std)
y_pred = y_pred * std + mean
print(y_pred)

[-1.58462254e-01 -5.41337648e-02  5.47749809e+01  5.43982033e+01]
[19.47419218 19.42657212 25.50221249 25.48067847]
[[-15.24891316 -14.95379621  69.72996557  78.79050465]
 [-15.51148926 -14.93969166  71.99341625  76.61108494]
 [-15.2515295  -14.95507686  69.9374722   78.59237696]
 ...
 [-16.17260386 -13.35972737  77.19402977  71.52036091]
 [-10.40595274 -15.40957661  80.48786328  68.09016082]
 [-19.95493499 -20.06295364  83.38237653  65.07067474]]


In [25]:
print(jigsaw_test)
print(y_test)
print(y_pred)

Nax_reco   Nay_reco  Wam_reco  Wbm_reco
0      6.133221  33.823960  40.26234  40.26234
1    -18.267890 -21.116640  42.60061  42.60061
2     10.463300 -44.193660  11.30977  11.30977
3     33.694610 -11.768850  34.21767  34.21767
4    -11.203410  25.987010  36.05377  36.05377
...         ...        ...       ...       ...
9995 -20.101950  11.728790  35.13280  35.13280
9996  14.759800  -2.551917  38.36331  38.36331
9997  -4.632653 -14.447920  13.28961  13.28961
9998 -18.708870  -8.249379  15.04520  15.04520
9999  19.258010  -2.164513  30.09002  30.09002

[10000 rows x 4 columns]
        Nax_gen    Nay_gen   Wam_gen   Wbm_gen
0      2.314114  12.492300  27.45884  82.17515
1     -4.793596  -5.698381  38.98607  81.02440
2     -4.498787   6.614501  34.39895  80.85492
3      1.986047  17.524520  39.25667  80.91439
4     -8.463210  -7.345803  37.39532  80.95255
...         ...        ...       ...       ...
9995   4.677482  -3.073829  29.08174  79.18924
9996  37.818410  38.399720  79.57488  30.

In [26]:
jigsaw_test = jigsaw_test.values
y_test = y_test.values
print(jigsaw_test)
print(y_test)

[[  6.133221  33.82396   40.26234   40.26234 ]
 [-18.26789  -21.11664   42.60061   42.60061 ]
 [ 10.4633   -44.19366   11.30977   11.30977 ]
 ...
 [ -4.632653 -14.44792   13.28961   13.28961 ]
 [-18.70887   -8.249379  15.0452    15.0452  ]
 [ 19.25801   -2.164513  30.09002   30.09002 ]]
[[  2.314114  12.4923    27.45884   82.17515 ]
 [ -4.793596  -5.698381  38.98607   81.0244  ]
 [ -4.498787   6.614501  34.39895   80.85492 ]
 ...
 [ 11.82702   21.70403   73.16238   36.67673 ]
 [ 34.44451  -15.19392   79.71199   27.28519 ]
 [-13.42579  -26.4406    80.98686   24.99055 ]]


In [27]:
print('Jigsaw:')
print('\tmae = ' + str(tf.keras.losses.MAE(np.transpose(y_test), np.transpose(jigsaw_test)).numpy()))
print(f'\tmape = ' + str(tf.keras.losses.MAPE(np.transpose(y_test), np.transpose(jigsaw_test)).numpy()))
print('\trmse = ' + str(tf.keras.losses.MSE(np.transpose(y_test), np.transpose(jigsaw_test)).numpy()**0.5))
print('NN:')
print('\tmae = ' + str(tf.keras.losses.MAE(np.transpose(y_test), np.transpose(y_pred)).numpy()))
print('\tmape = ' + str(tf.keras.losses.MAPE(np.transpose(y_test), np.transpose(y_pred)).numpy()))
print('\trmse = ' + str(tf.keras.losses.MSE(np.transpose(y_test), np.transpose(y_pred)).numpy()**0.5))

Jigsaw:
	mae = [22.01058452 22.0039148  31.67050295 32.04758756]
	mape = [918.00857536 705.03469392  63.41795107  64.92450152]
	rmse = [26.00207054 25.95102474 38.56823362 38.91140739]
NN:
	mae = [19.28904983 19.44816118 22.25897943 21.85369295]
	mape = [907.92573918 573.99995556 104.12663525 102.14821687]
	rmse = [23.55618156 23.84090315 30.03748549 29.24411189]


In [28]:
def make_chart_data(idx):
    return pd.DataFrame({'Jigsaw': jigsaw_test[:, idx] - y_test[:, idx], 'NN': y_pred[:, idx] - y_test[:, idx]})
Nax_chart_data = make_chart_data(0)
Nay_chart_data = make_chart_data(1)
Wam_chart_data = make_chart_data(2)
Wbm_chart_data = make_chart_data(3)

In [29]:
def make_chart(chart_data, name):
    return alt.Chart(chart_data).transform_fold(['Jigsaw', 'NN'], as_=['Method', name]).mark_area(interpolate='step-after', line=True, opacity=0.7).encode(alt.X(f"{name}:Q", bin=alt.Bin(extent=[-100, 100], step=5)), y=alt.Y("count()", stack=None), color='Method:N')
Nax_chart = make_chart(Nax_chart_data, 'Nax error')
Nay_chart = make_chart(Nay_chart_data, 'Nay error')
Wam_chart = make_chart(Wam_chart_data, 'Wam error')
Wbm_chart = make_chart(Wbm_chart_data, 'Wbm error')
Nax_chart | Nay_chart | Wam_chart | Wbm_chart

In [30]:
Wam_chart_data = pd.DataFrame({'Generator': y_test[:, 2], 'Jigsaw': jigsaw_test[:, 2], 'NN': y_pred[:, 2]})
Wam_chart = alt.Chart(Wam_chart_data).transform_fold(['Generator', 'Jigsaw', 'NN'], as_=['Method', 'Wam']).mark_area(interpolate='step-after', line=True, opacity=0.7).encode(alt.X("Wam:Q", bin=alt.Bin(extent=[38, 102], step=1)), y=alt.Y("count()", stack=None, scale=alt.Scale(type='log')), color='Method:N')
Wbm_chart_data = pd.DataFrame({'Generator': y_test[:, 3], 'Jigsaw': jigsaw_test[:, 3], 'NN': y_pred[:, 3]})
Wbm_chart = alt.Chart(Wbm_chart_data).transform_fold(['Generator', 'Jigsaw', 'NN'], as_=['Method', 'Wbm']).mark_area(interpolate='step-after', line=True, opacity=0.7).encode(alt.X("Wbm:Q", bin=alt.Bin(extent=[38, 102], step=1)), y=alt.Y("count()", stack=None, scale=alt.Scale(type='log')), color='Method:N')
Wam_chart | Wbm_chart