In [1]:
import tensorflow as tf
import keras_tuner
import numpy as np


import pandas as pd
import openpyxl # Needed for reading excel
import pathlib

import decomposition
import models
import data
from metrics import smape
from windower import WindowGenerator
import hp_training
import results

2023-03-10 22:30:42.496665: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
cwd = pathlib.Path.cwd()

code_directory = cwd.parents[1]
gonem_directory = code_directory / "notebooks" / "Gonem"
data_directory = code_directory / "data"
hp_directory = code_directory / "hp"
scenario_directory = code_directory / "scenarios"
model_directory = code_directory / 'models'

model = 'ARF' # SS, ARF or ED
product = 'maize' # maize, sunflower or wheat 

data_type = 'in_sample' # in_sample or out_sample
scenario = 2 # 0: the normal setting, 1 or 2


model_path =  model_directory / f"{model}_{product}"
checkpoint_path = model_path / f"{model}_{product}"

results_path = model_directory / f"{model}_{product}"

results_path

scenario_files = []
for path in pathlib.Path(scenario_directory).iterdir():
    if path.is_file():
        scenario_files.append(path.name)

In [10]:
if scenario:
    data_type = f"{scenario_files[scenario-1]}"
    print(data_type)
    df = data.get_data(scenario_directory / scenario_files[scenario-1])
else:
    df = data.get_data(directory_path=data_directory, product=product)


df = df.iloc[:-2]
df.describe()

In [11]:
label_columns = ['price']
label_columns = df.columns[df.columns.get_level_values(0).isin(label_columns)].tolist()
label_columns

[('price', 'Brazil'),
 ('price', 'France'),
 ('price', 'Germany'),
 ('price', 'Global'),
 ('price', 'Hungary'),
 ('price', 'Ukraine')]

In [12]:
stl = decomposition.STLDecomposer(labels=label_columns, period=12)
log = decomposition.Logger(labels=label_columns)
std = decomposition.Standardizer()

preproc = decomposition.Processor().add(stl).add(log).add(std)

In [13]:
width = 24
label_width = 6
shift = 6

if data_type == 'in_sample':
    test_begin = None
else:
    test_begin = 0.
    
window = WindowGenerator(input_width=width, label_width=label_width, shift=shift, data=df, 
                    # train_begin=0, train_end=.9, val_begin=None, val_end=.96,
                    train_begin=0., train_end=.97, val_begin=None, val_end=None,
                    # train_begin=0, train_end=.5, val_begin=None, val_end=.8,
                    test_begin=test_begin, test_end=1., connect=True, remove_labels=True, label_columns=label_columns)
window.preprocess(preproc)
window

Total window size: 30
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
Label indices: [24 25 26 27 28 29]
Label column name(s): [('price', 'Brazil'), ('price', 'France'), ('price', 'Germany'), ('price', 'Global'), ('price', 'Hungary'), ('price', 'Ukraine')]

In [14]:
label_std = decomposition.Standardizer(mean=std.mean[window.label_columns], std=std.std[window.label_columns])
label_log = decomposition.Logger(label_indices=range(len(window.label_columns)))
postproc = decomposition.Processor().add(label_std).add(label_log)
window.add_label_postprocess(postproc)

In [80]:
inputs = tf.convert_to_tensor(np.load(model_path /  f"{product}_inputs_{data_type}.npy"))
labels = tf.convert_to_tensor(np.load(model_path /  f"{product}_labels.npy"))
weights = tf.convert_to_tensor(np.load(model_path / f"{product}_weights.npy"))
predictions = tf.convert_to_tensor(np.load(model_path / f"{product}_predictions.npy"))
mcds = tf.convert_to_tensor(np.load(model_path / f"{product}_mcd_predictions.npy"))

In [83]:
inputs.shape, labels.shape, weights.shape, predictions.shape, mcds.shape

(TensorShape([7, 24, 75]),
 TensorShape([7, 6, 6]),
 TensorShape([1, 75]),
 TensorShape([7, 6, 6]),
 TensorShape([100, 7, 6, 6]))

In [81]:
results.weight_results(weights[0], window.train_df.columns)

Unnamed: 0,country,price_trend,price_seasonal,price_residual,AVG_TAVG,Corn Price Futures,MAX_TMAX,MIN_TMIN,OBS_VALUE_QUANTITY_TON,OIL_PRICE $/bbl,agricultural_land,energy_use_kg_of_oil_equivalent_per_capita,fossil_fuel_energy_consumption_perc_of_total,gdp_current_us,population_total,renewable_energy_consumption_perc_of_total,unemployment_total,price
0,Brazil,0.494678,0.739065,0.500782,0.254973,,0.98843,0.168805,0.529142,,0.020744,0.507422,0.107799,0.122484,0.079451,0.437595,0.04205,
1,France,0.993519,0.720823,0.991954,0.413029,,0.035834,0.150684,0.116774,,0.17572,0.073418,0.244538,0.526178,0.305757,0.033009,0.084799,
2,Germany,0.481186,0.879754,0.846405,0.062893,,0.558997,0.544435,0.811979,,0.975717,0.421624,0.045476,0.94126,0.026491,0.005236,0.481805,
3,Global,0.91311,0.940519,0.987832,,0.455777,,,,0.510973,,,,,,,,
4,Hungary,0.075148,0.995326,0.310927,0.334205,,0.572407,0.007502,0.002124,,0.021501,0.2207,0.783732,0.487172,0.002497,0.44257,0.506175,
5,Ukraine,0.501829,0.404425,0.505124,0.059433,,0.732472,0.559622,0.723768,,0.587294,0.317992,0.052708,0.991868,0.525882,0.061814,0.993148,


In [82]:
intervals = results.forecast_interval(mcds, 0.1)

In [72]:
import metrics
for i in range(6):
    print(metrics.smape(predictions[:, 5, i], labels[:, 5, i])/2)

tf.Tensor(14.429292, shape=(), dtype=float32)
tf.Tensor(11.13741, shape=(), dtype=float32)
tf.Tensor(26.910423, shape=(), dtype=float32)
tf.Tensor(12.702807, shape=(), dtype=float32)
tf.Tensor(7.5614967, shape=(), dtype=float32)
tf.Tensor(14.304833, shape=(), dtype=float32)
