In [1]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf

import numpy as np
import pandas as pd

import lib.Metrics as Metrics
from lib.models.IRNN_Full_Bayes import IRNN_Full_Bayes
from lib.models.IRNN import IRNN
from lib.train_functions import fit
from lib.utils import *
from lib.regional_data_builder import DataConstructor

print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
region = 'US'
n_queries = 90
n_regions = 1

for test_season in [2015, 2016, 2017, 2018]:
    batch_size = 32
    window_size = 28
    _data = DataConstructor(test_season=test_season, region = region, window_size=window_size, n_queries=n_queries, gamma=28)
    x_train, y_train, x_test, y_test, scaler = _data()

    x_train = tf.cast(x_train, tf.float32)
    y_train = tf.cast(y_train, tf.float32)
    x_test = tf.cast(x_test, tf.float32)
    y_test = tf.cast(y_test, tf.float32)

    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

    n_op = y_test.shape[-1]

    gamma = 28
    epochs= 30
    kl_power= -2.857091693154802
    lr_power= -3.7364141761644545
    op_scale_pwr= -0.27139484469983133
    p_scale_pwr= -1.827071638197097
    q_scale_pwr= -1.2461978663286395
    rnn_units= 108
    lag = 14
    epochs= 1
    n_samples = 3
    prediction_steps = 5


    _model = IRNN_Full_Bayes(kl_power=kl_power, 
                        n_op=n_op,
                        op_scale_pwr=op_scale_pwr,
                        p_scale_pwr=p_scale_pwr,
                        q_scale_pwr=q_scale_pwr,
                        rnn_units = rnn_units, 
                        gamma=gamma,       
                        window_size=window_size, 
                        lag = lag,
                        n_samples=prediction_steps,
                        n_regions = n_regions,
                        use_bn = True
                        )

    def loss_fn(y, p_y):
        return -p_y.log_prob(y)

    optimizer = tf.optimizers.Adam(learning_rate=10**lr_power)
    
    pred = _model(x_test)

    _model, history = fit(_model, 
                        train_dataset,
                        optimizer=optimizer, 
                        epochs = epochs, 
                        loss_fn = loss_fn,  
                        prediction_steps = prediction_steps,
                        reset_pos=2000
                        )
    
    file = f'Weights/{test_season}{region}_weights.tf'
    if os.path.exists(file):
        os.remove(file)
        
    _model.save_weights(file)

In [4]:
region = 'US'
n_queries = 90
n_regions = 1

predictions = {}
for test_season in [2016]:
    batch_size = 32
    window_size = 28
    _data = DataConstructor(test_season=test_season, region = region, window_size=window_size, n_queries=n_queries, gamma=28)
    x_train, y_train, x_test, y_test, scaler = _data()

    x_train = tf.cast(x_train, tf.float32)
    y_train = tf.cast(y_train, tf.float32)
    x_test = tf.cast(x_test, tf.float32)
    y_test = tf.cast(y_test, tf.float32)

    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

    n_op = y_test.shape[-1]

    gamma = 28
    epochs= 30
    kl_power= -2.857091693154802
    lr_power= -3.7364141761644545
    op_scale_pwr= -0.27139484469983133
    p_scale_pwr= -1.827071638197097
    q_scale_pwr= -1.2461978663286395
    rnn_units= 108
    lag = 14

    n_samples = 3
    prediction_steps = 5
    n_regions = 49


    file = f'Weights/{test_season}{region}_weights.tf'

    loaded_model = IRNN_Full_Bayes(kl_power=kl_power, 
                            n_op=n_op,
                            op_scale_pwr=op_scale_pwr,
                            p_scale_pwr=p_scale_pwr,
                            q_scale_pwr=q_scale_pwr,
                            rnn_units = rnn_units, 
                            gamma=gamma,       
                            window_size=window_size, 
                            lag = lag,
                            n_samples=prediction_steps,
                            use_bn = True
                            )
    
    try:
        scaler = scaler[np.newaxis, np.newaxis, :]
    except:
        scaler = scaler[0]
        
    loaded_model.load_weights(file)

    y_pred = loaded_model.predict(x_test, 128, verbose=True)
    std = (y_pred[0]+y_pred[1])[..., -n_regions:] * scaler - y_pred[0][..., -n_regions:] * scaler
    mean = y_pred[0][..., -n_regions:] * scaler
    y_te = y_test[..., -n_regions:]*scaler

    predictions[test_season] = {'true':y_te,
                            'mean':mean,
                            'std':std}

  scaler = scaler[0]


In [8]:
test_seasons = [2016]
skills = pd.DataFrame(index = [7,14,21,28], columns = test_seasons, dtype=float)
nlls = pd.DataFrame(index = [7,14,21,28], columns = test_seasons, dtype=float)
for g in [6,13,20,27]:
    for season in test_seasons:
        try:
            pred = predictions[season]
            skills.loc[g+1, season] = np.exp(np.mean(Metrics.mb_log(pred['true'][:, g, :], pred['mean'][:, g, :], pred['std'][:, g, :]))).astype(float)
            nlls.loc[g+1, season] = Metrics.nll(pred['true'][:, g-1, -1], pred['mean'][:, g-1, -1], pred['std'][:, g-1, -1])
        except:
            pass
display(skills)
display(nlls)

for test_season in test_seasons:
    pred = predictions[test_season]
    for num, g in zip([1,2,3,4], [6,13,20,27]):
        plt.subplot(2,2,num)
        plt.plot(pred['true'][:, g, -1], color='black')
        plt.plot(pred['mean'][:, g, -1], color='red')
        plt.fill_between(np.arange(pred['true'].shape[0]), (pred['mean']-pred['std'])[:, g, -1], (pred['mean']+pred['std'])[:, g, -1], color='red', linewidth = 0, alpha = 0.3)
    plt.show()

  mbl = np.log((dist.cdf(true + 0.6) - dist.cdf(true - 0.5)))


Unnamed: 0,2016
7,0.767802
14,0.769358
21,0.74311
28,0.717331


Unnamed: 0,2016
7,0.344572
14,0.375264
21,0.536412
28,0.668337
