In [1]:
import tensorflow as tf
import keras_tuner
import numpy as np


import pandas as pd
import openpyxl # Needed for reading excel
import pathlib

import decomposition
import models
import data
from metrics import smape
from windower import WindowGenerator
import hp_training
import results

2023-03-10 13:09:37.921219: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [157]:
cwd = pathlib.Path.cwd()

code_directory = cwd.parents[1]
gonem_directory = code_directory / "notebooks" / "Gonem"
model_directory = gonem_directory / 'models'

model = 'ARF'
product = 'wheat'
model_path =  model_directory / f"{model}_{product}_in_sample"
checkpoint_path = model_path / f"{model}_{product}"


In [158]:
df = data.get_data(directory_path=gonem_directory, product=product)
df = df.iloc[:-2]
df.describe()

Unnamed: 0_level_0,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,AVG_TAVG,MAX_TMAX,MAX_TMAX,MAX_TMAX,MAX_TMAX,MAX_TMAX,...,renewable_energy_consumption_perc_of_total,renewable_energy_consumption_perc_of_total,renewable_energy_consumption_perc_of_total,renewable_energy_consumption_perc_of_total,renewable_energy_consumption_perc_of_total,unemployment_total,unemployment_total,unemployment_total,unemployment_total,unemployment_total
PARTNER_Labels,Belgium,France,Germany,Romania,United Kingdom,Belgium,France,Germany,Romania,United Kingdom,...,Belgium,France,Germany,Romania,United Kingdom,Belgium,France,Germany,Romania,United Kingdom
count,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,...,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0,214.0
mean,111.30252,125.494113,104.143755,105.397067,96.544608,215.733645,225.531663,218.804582,224.761987,168.507655,...,7.672173,12.883762,13.639136,22.297593,7.120444,7.272757,8.96986,5.587944,6.091075,5.50507
std,57.100073,55.456797,65.178828,81.500731,39.31521,79.02219,67.153277,81.278888,81.711107,50.004673,...,2.698506,2.273139,2.851412,2.019432,3.915944,0.994389,0.867082,2.21661,0.996584,1.547705
min,-0.733333,20.168095,-29.860742,-65.129397,7.340256,59.0,107.622222,36.388889,47.0,74.5,...,2.46,8.52,7.28,17.39,1.35,5.36,7.39,3.14,3.91,3.74
25%,63.525974,77.127932,48.280935,29.836943,61.896038,147.5,163.656487,140.181174,157.733333,124.583333,...,5.438125,11.155,11.109375,21.41625,3.506875,6.26,8.08625,3.64625,5.508333,3.91875
50%,104.34596,121.548459,102.466712,104.869216,96.188597,223.0,228.556851,226.650735,229.75,164.4375,...,9.01625,13.286667,13.97125,23.290833,7.1925,7.46,9.06875,5.01125,6.412083,5.27875
75%,163.724856,177.640994,163.84533,182.880379,131.974731,281.0,284.822581,287.334967,300.5,208.409091,...,10.21,15.31,16.448125,23.54,11.380625,8.149583,9.789167,7.403125,6.92625,7.140625
max,239.666667,231.747995,229.864177,243.079179,178.133333,397.0,362.382979,371.823529,367.75,300.5625,...,10.66,15.53,17.17,24.4,12.24,8.52,10.35,11.17,7.27,8.04


In [159]:
label_columns = ['price']
label_columns = df.columns[df.columns.get_level_values(0).isin(label_columns)].tolist()
label_columns

[('price', 'Belgium'),
 ('price', 'France'),
 ('price', 'Germany'),
 ('price', 'Global'),
 ('price', 'Romania'),
 ('price', 'United Kingdom')]

In [160]:
stl = decomposition.STLDecomposer(labels=label_columns, period=12)
log = decomposition.Logger(labels=label_columns)
std = decomposition.Standardizer()

preproc = decomposition.Processor().add(stl).add(log).add(std)

In [161]:
width = 24
label_width = 6
shift = 6

window = WindowGenerator(input_width=width, label_width=label_width, shift=shift, data=df, 
                    # train_begin=0, train_end=.9, val_begin=None, val_end=.96,
                    train_begin=0, train_end=.97, val_begin=None, val_end=None,
                    # train_begin=0, train_end=.5, val_begin=None, val_end=.8,
                    test_begin=0., test_end=1., connect=True, remove_labels=True, label_columns=label_columns)
window.preprocess(preproc)
window

Total window size: 30
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [24 25 26 27 28 29]
Label column name(s): [('price', 'Belgium'), ('price', 'France'), ('price', 'Germany'), ('price', 'Global'), ('price', 'Romania'), ('price', 'United Kingdom')]

In [162]:
label_std = decomposition.Standardizer(mean=std.mean[window.label_columns], std=std.std[window.label_columns])
label_log = decomposition.Logger(label_indices=range(len(window.label_columns)))
postproc = decomposition.Processor().add(label_std).add(label_log)
window.add_label_postprocess(postproc)

In [163]:
tuner = hp_training.get_tuner(model, gonem_directory/'hp', window)

INFO:tensorflow:Reloading Tuner from /code/notebooks/Gonem/hp/ARF/tuner0.json


In [164]:
# hp_training.run(tuner, window)

In [165]:
best_hps = tuner.get_best_hyperparameters()[0]
best_hps.values

{'lstm_units': 224,
 'lstm_layers': 1,
 'prediction_units': 64,
 'prediction_layers': 6,
 'feature_units': 416,
 'feature_layers': 8,
 'heads': 5,
 'dropout': 0.18732635985077017,
 'key_dim': 112,
 'l1': 0.0009003172160791668,
 'l2': 0.00010979313621486196,
 'learning_rate': 0.000291383964865669,
 'tuner/epochs': 200,
 'tuner/initial_epoch': 67,
 'tuner/bracket': 2,
 'tuner/round': 2,
 'tuner/trial_id': '0200'}

In [166]:
hp_training.final_train(tuner, window, checkpoint_path)

Epoch 1/200
6/6 - 12s - loss: 215.2585 - mae: 0.7864 - mse: 0.9938 - mape: 140.5968 - smape: 160.5841 - val_loss: 166.5733 - val_mae: 0.7571 - val_mse: 0.9659 - val_mape: 263.0664 - val_smape: 113.4517 - 12s/epoch - 2s/step
Epoch 2/200
6/6 - 2s - loss: 176.2173 - mae: 0.7821 - mse: 1.0068 - mape: 247.9312 - smape: 122.5480 - val_loss: 125.0605 - val_mae: 0.5064 - val_mse: 0.6103 - val_mape: 205.3901 - val_smape: 72.0818 - 2s/epoch - 392ms/step
Epoch 3/200
6/6 - 2s - loss: 159.0959 - mae: 0.6678 - mse: 0.8443 - mape: 221.2731 - smape: 106.0408 - val_loss: 127.8707 - val_mae: 0.5695 - val_mse: 0.7326 - val_mape: 197.5930 - val_smape: 74.2263 - 2s/epoch - 342ms/step
Epoch 4/200
6/6 - 2s - loss: 160.4147 - mae: 0.6224 - mse: 0.7672 - mape: 196.1225 - smape: 107.4844 - val_loss: 129.8403 - val_mae: 0.6384 - val_mse: 0.8046 - val_mape: 295.0892 - val_smape: 77.0163 - 2s/epoch - 339ms/step
Epoch 5/200
6/6 - 2s - loss: 157.0121 - mae: 0.7388 - mse: 0.9774 - mape: 293.7535 - smape: 104.7738 - v

<models.AutoregressiveFeedback at 0x7fa499e57880>

In [167]:
m = tuner.hypermodel.build(best_hps)
m.load_weights(checkpoint_path)
m.evaluate(window.test)



[128.99041748046875,
 0.5177812576293945,
 0.6160849928855896,
 222.86273193359375,
 76.82357025146484]

In [168]:
# w.test

# # val_performance['1'] = m.evaluate(w.val)
# for i in range(6):

#     label = label_columns[i]
#     print(label)
#     # performance['1'] = m.evaluate(w.test)
#     w.plot(m, plot_col=label, max_subplots=7)


In [169]:
inputs, labels, predictions, weights, mcds = [], [], [], [], []
for x, y in window.test.take(40):
    inputs.append(x)
    lab = y
    lab = window.label_postprocessor.reverse(lab)
    labels.append(lab)
    
    pred = m(x)
    pred = window.label_postprocessor.reverse(pred)
    predictions.append(pred)
    
    weight = m.attention_layer(x, return_weights=True)[1]
    weights.append(weight)
    
    mcd = results.monte_carlo_dropout(x, m, 100, window.label_postprocessor.reverse, return_weight=False)
    mcds.append(mcd)
    weights.append(weight)
    
inputs = tf.concat(inputs, axis=0)
labels = tf.concat(labels, axis=0)
weights = tf.concat(weights, axis=0)
weights = tf.reduce_mean(weights, axis=0)
predictions = tf.concat(predictions, axis=0)
mcds = tf.concat(mcds, axis=1)

In [170]:
np.save(model_path / f"{product}_inputs", inputs.numpy())
np.save(model_path / f"{product}_labels", labels.numpy())
np.save(model_path / f"{product}_weights", weights.numpy())
np.save(model_path / f"{product}_predictions", predictions.numpy())
np.save(model_path / f"{product}_mcd_predictions", mcds.numpy())


In [100]:
predictions.shape

TensorShape([185, 6, 6])