In [24]:
import datetime
import os
import shutil

import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.metrics import MeanAbsolutePercentageError, MeanAbsoluteError, RootMeanSquaredError

import definitions
from training import train, data
from training.loguniform import LogUniform
from training.stepuniform import StepUniform
from training.steploguniform import StepLogUniform
from scipy.stats.distributions import randint
import numpy as np
import pandas as pd

import altair as alt

alt.data_transformers.enable('data_server')
#alt.data_transformers.disable_max_rows()

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

In [25]:
dataset = 'Wlnu'
target = 'W'

In [26]:
jigsaw_train, jigsaw_val, jigsaw_test = data.get_jigsaw(dataset=dataset, target=target)
x_train, y_train, x_val, y_val, X_test, y_test = data.get_datasets(dataset=dataset, target=target, scale=True)
print(jigsaw_test)
print(y_test)
print(x_train)

Wm_reco
0     80.70927
1     72.89712
2     77.88606
3     71.34239
4     74.48588
...        ...
9995  64.69530
9996  45.02896
9997  43.93967
9998  74.86523
9999  83.56715

[10000 rows x 1 columns]
        Wm_gen
0     81.10584
1     79.69392
2     92.72857
3     76.64119
4     80.20827
...        ...
9995  80.03781
9996  82.27058
9997  79.12370
9998  80.47066
9999  86.21435

[10000 rows x 1 columns]
           METx      METy   Lx_reco   Ly_reco   Lz_reco   Lm_reco
0      0.441085 -0.123268 -0.738416 -0.248920  0.645141 -0.000997
1      1.032623 -0.340998 -1.236841 -0.302691  0.932249 -0.000997
2     -0.308838 -1.228226  0.567540  1.111681 -1.675584 -0.000997
3     -0.858471  0.119029  1.874423  1.071938 -1.007796 -0.000997
4      0.403788  0.631750 -1.258448 -1.783140 -1.340958 -0.000997
...         ...       ...       ...       ...       ...       ...
79995 -1.070399 -0.769946  1.094526  0.742549 -0.650372 -0.000997
79996  0.787721  0.803089 -1.202470  0.582324 -0.045666 -0.000997
7

In [27]:
alt.Chart(jigsaw_test).mark_bar().encode(alt.X(f"{definitions.JIGSAW_TARGETS[dataset][target][0]}:Q", bin=True), y="count()")

In [28]:
jigsaw_difference = pd.DataFrame({'Actual - Jigsaw': y_test.values[:, 0] - jigsaw_test[definitions.JIGSAW_TARGETS[dataset][target][0]].values})
print(jigsaw_difference)

Actual - Jigsaw
0             0.39657
1             6.79680
2            14.84251
3             5.29880
4             5.72239
...               ...
9995         15.34251
9996         37.24162
9997         35.18403
9998          5.60543
9999          2.64720

[10000 rows x 1 columns]


In [29]:
alt.Chart(jigsaw_difference).mark_bar().encode(alt.X("Actual - Jigsaw:Q", bin=alt.Bin(extent=[0, 100], step=5)), y="count()")

In [30]:
print('mae = ' + str(tf.keras.losses.MAE(y_test.values[:, 0], jigsaw_test.values[:, 0])))
print('mape = ' + str(tf.keras.losses.MAPE(y_test.values[:, 0], jigsaw_test.values[:, 0])))
print('rmse = ' + str(tf.keras.losses.MSE(y_test.values[:, 0], jigsaw_test.values[:, 0])**0.5))

mae = tf.Tensor(16.617048470402004, shape=(), dtype=float64)
mape = tf.Tensor(20.40721814904974, shape=(), dtype=float64)
rmse = tf.Tensor(25.327719402850715, shape=(), dtype=float64)


In [31]:
print(y_val)
print(y_train.min())
print(y_train.max())
print(y_train)
print(y_train[y_train['Wm_gen'] > 1000])

Wm_gen
0     81.34052
1     86.00704
2     84.13220
3     85.02779
4     80.34086
...        ...
9995  79.20677
9996  80.51755
9997  81.02884
9998  80.53999
9999  81.03747

[10000 rows x 1 columns]
Wm_gen    24.26636
dtype: float64
Wm_gen    5660.585
dtype: float64
         Wm_gen
0      80.70409
1      79.64823
2      78.37740
3      80.05252
4      79.60021
...         ...
79995  79.93488
79996  80.11201
79997  80.72748
79998  80.65090
79999  81.30719

[80000 rows x 1 columns]
         Wm_gen
3004   4000.270
5605   1622.509
11039  1134.112
42519  5660.585


In [32]:
alt.Chart(y_train).mark_bar().encode(alt.X("Wm_gen:Q", bin=alt.Bin(extent=[40, 130], step=5)), y="count()")

In [33]:
def build_model(hparams, input_shape):
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=input_shape))
    for _ in range(hparams['num_layers']):
        model.add(layers.Dense(units=hparams['num_units'],
                               activation='relu'))
    model.add(layers.Dense(1))
    model.compile(
        optimizer=keras.optimizers.Adam(
            hparams['learning_rate']),
        loss='mean_squared_error',
        metrics=[MeanAbsolutePercentageError(), MeanAbsoluteError(), RootMeanSquaredError()])
    return model

In [34]:
log_dir = definitions.LOG_DIR / 'Wlnu' / 'v1'
shutil.rmtree(log_dir)
log_dir.mkdir()
hp_rv = {'num_layers': randint(1, 2),
            'num_units': StepUniform(start=10, num=10, step=10),
            'learning_rate': LogUniform(loc=-5, scale=4, base=10, discrete=False),
            'batch_size': StepLogUniform(start=5, num=4, step=1, base=2),
            'epochs': randint(10, 100)}
print(log_dir)

C:\Users\alexj\work\mass_regression\logs\Wlnu\v1


In [35]:
train.random_search(build_fn=build_model, x=x_train, y=y_train, x_val=x_val, y_val=y_val, n=20, hp_rv=hp_rv, log_dir=log_dir)

tage_error: 85.5228 - mean_absolute_error: 69.7001 - root_mean_squared_error: 75.0578 - val_loss: inf - val_mean_absolute_percentage_error: 85.1834 - val_mean_absolute_error: 69.3099 - val_root_mean_squared_error: 71.0320
Epoch 58/86
Epoch 59/86
Epoch 60/86
Epoch 61/86
Epoch 62/86
Epoch 63/86
Epoch 64/86
Epoch 65/86
Epoch 66/86
Epoch 67/86
Epoch 68/86
Epoch 69/86
Epoch 70/86
Epoch 71/86
Epoch 72/86
Epoch 73/86
Epoch 74/86
Epoch 75/86
Epoch 76/86
Epoch 77/86
Epoch 78/86
Epoch 79/86
Epoch 80/86
Epoch 81/86
Epoch 82/86
Epoch 83/86
Epoch 84/86
Epoch 85/86
Epoch 86/86
Train on 80000 samples, validate on 10000 samples
Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11
Train on 80000 samples, validate on 10000 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60


In [0]:
model = tf.keras.models.load_model(str(log_dir / 'best_model.h5'))

In [0]:
y_pred = model.predict(x_val)
print(y_pred[:, 0].shape)

In [0]:
import altair as alt
from vega_datasets import data

import pandas as pd

In [0]:
chart_data = pd.DataFrame({'Price': np.concatenate((y_pred[:, 0], y_val)), 'Type': ['Estimate']*y_pred.shape[0] + ['Actual']*y_pred.shape[0]})
print(chart_data)

In [0]:
alt.Chart(chart_data).mark_bar().encode(alt.X("Price:Q", bin=True), y="count()", color="Type")

In [0]:
difference = y_pred[:, 0] - y_val

In [0]:
chart_data = pd.DataFrame({'Actual - Expected': difference})
print(chart_data)

In [0]:
alt.Chart(chart_data).mark_bar().encode(alt.X("Actual - Expected:Q", bin=alt.Bin(step=2)), y="count()")