In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import torch
import os
import pandas as pd
import pickle
import numpy as np
import math

import plotly.graph_objects as go
import  plotly.express  as px
from plotly.subplots import make_subplots

import sys
if ".." not in sys.path: sys.path.insert(0,"..")
import importlib
import glms    
from glms import NeuralDGLM


## Initial Results: Observing predictions for Australia Dataset

In [156]:
print("Average prediction for mean term in predicted distribution for rainfall")
{ k:data['target_rain_value'][:,2].mean() for k,data in test_output.items() } 

Average prediction for mean term in predicted distribution for rainfall


{'Adelaide': 1.4793103448275864,
 'Albury': 1.8355164835164837,
 'AliceSprings': 0.8505788067675869,
 'BadgerysCreek': 2.221972132904609,
 'Ballarat': 1.5295627376425855,
 'Brisbane': 3.0472122302158273,
 'Cairns': 5.570773152081563,
 'Canberra': 1.8645805592543274,
 'Cobar': 1.1269573370839194,
 'CoffsHarbour': 4.664722536806342,
 'Dartmoor': 2.1072965388213283,
 'Darwin': 5.027389951089373,
 'GoldCoast': 3.647114030971375,
 'Hobart': 1.633318834275772,
 'Moree': 1.3129963898916965,
 'MountGambier': 1.9762770562770557,
 'Nhil': 0.8076470588235295,
 'NorahHead': 3.4462062256809336,
 'NorfolkIsland': 3.0599821348816434,
 'Nuriootpa': 1.2716460513796384,
 'PearceRAAF': 0.8476275738585497,
 'Perth': 1.8391752577319587,
 'PerthAirport': 1.8171142467000454,
 'Portland': 2.4377013963480128,
 'Richmond': 2.074204601076848,
 'Sydney': 3.3681476418318526,
 'SydneyAirport': 2.7280233122875184,
 'Townsville': 3.2214556163230963,
 'Tuggeranong': 2.082119205298013,
 'Uluru': 0.36897689768976893,
 '

In [157]:
print("Pred Disp: min, max", pred_disp.min(), pred_disp.max())
print("Pred Mean: min, max", pred_mean.min(), pred_mean.max())
print("Prob: min, max", pred_prob.min(), pred_prob.max())

Pred Disp: min, max 0.10657 1.201
Pred Mean: min, max 0.2095 4.805
Prob: min, max 9.316e-05 0.999


## Analysing predictions for a Hurdle type Model

In [12]:
test_data['pred_p'].max()

0.746

In [10]:
# importlib.reload(glms)
# from glms import NeuralDGLM
model_version = 51
target_distribution = "compound_poisson"
base_nn = "HLSTM"

# Getting data
path_ = f"../Checkpoints/DGLM_{base_nn}_{target_distribution}/lightning_logs/version_{model_version}/"
ckpt_path = glms.NeuralDGLM.get_ckpt_path(os.path.join(path_,"checkpoints"))

scaler_features, scaler_target = glms.NeuralDGLM.load_scalers(path_)
model = glms.NeuralDGLM.load_from_checkpoint(ckpt_path, save_hparams=False, scaler_features=scaler_features, scaler_target=scaler_target)
model.freeze()
model.eval()

test_output = pickle.load( open(os.path.join(path_,"test_output.pkl"),"rb") )
test_data = test_output['Cairns'] 

# The predictions are a batch of windows of 7 days. 
# However each element in the batch has 6-day overlap with the next 7 day period
# Therefore we take the n-th day in each period

day_idx = 2
pred_mu = test_data['pred_mu'][:,day_idx] #.shape
pred_p = test_data['pred_p'][:,day_idx] 
pred_disp = test_data['pred_disp'][:, day_idx]
target_rain_value = test_data['target_rain_value'][:,day_idx]
dates = [ date_index[day_idx] for date_index in test_data['date'] if len(date_index)>day_idx]

# Printing statistics for outputs
_m = model.target_distribution.get_mean(pred_mu, pred_disp, pred_p)
_v = model.target_distribution.get_variance(pred_mu, pred_disp, pred_p)
_p = pred_p
print(f"""Mu max: {pred_mu.max():.3f} \t min: {pred_mu.min():.3f} \n 
        Disp max: {pred_disp.max():.3f} \t min: {pred_disp.min():.3f} \n
        Mean max: {_m.max():.3f} \t min: {_m.min():.3f} \n
        Var max: {_v.max():.3f} \t min: {_v.min():.3f} \n
        P   max: {_p.max():.3f} \t min: {_p.min():.3f}""")

# Setting up plot 
data_len = pred_mu.size
datums_in_plot = 120
cols = 1
rows = math.ceil( data_len/(datums_in_plot*cols) )

# Making figure
fig = make_subplots(rows=rows, cols=cols, start_cell="top-left", specs=[ [{"secondary_y": True}]*cols ]*rows )
idx = 0
for row in range(1, rows+1):
    for col in range(1, cols+1):
        show_legend = (row==1 and col==1)
        
        # Selecting subsets of data to plot
        pred_disp_batch = pred_disp[idx:idx+datums_in_plot]
        pred_mu_batch = pred_mu[idx:idx+datums_in_plot]
        pred_p_batch = pred_p[idx:idx+datums_in_plot]
        target_rain_value_batch = target_rain_value[idx:idx+datums_in_plot]
        dates_batch = dates[idx:idx+datums_in_plot]
        
        # Calculating Mean, variance from distribution params
        pred_mean_batch = model.target_distribution.get_mean(pred_mu_batch, pred_disp_batch, pred_p_batch)
        pred_var_batch = model.target_distribution.get_variance(pred_mu_batch, pred_disp_batch, pred_p_batch)
        
#         # Trace for true rain value
        fig.add_trace(go.Scatter(x=dates_batch, y=target_rain_value_batch.tolist(),name='obs',mode='lines', legendgroup="obs", line=dict(color='black'), showlegend=show_legend),
                  row=row, col=col) 
        
        # Trace for rain prediction
        fig.add_trace(go.Scatter(x=dates_batch, y=pred_mean_batch.tolist(),name='mean_pred',mode='lines',legendgroup="mean_pred", line=dict(color='red'), showlegend=show_legend),
                  row=row, col=col) 

        # Trace for rain variance prediction
        fig.add_trace(go.Scatter(x=dates_batch, y=pred_var_batch.tolist(),name='var_pred',mode='lines', legendgroup="var_pred", line=dict(color='purple'), showlegend=show_legend),
                  row=row, col=col, secondary_y=True)
        
        idx = idx + datums_in_plot

fig.update_layout(height=6600, width=1600, title_text="Predictions", showlegend=True)
fig.show()

Mu max: 16.485 	 min: 0.061 
 
        Disp max: 0.406 	 min: 0.406 

        Mean max: 16.485 	 min: 0.000 

        Var max: 8.857 	 min: 0.000 

        P   max: 1.100 	 min: 1.100


In [6]:
print( "Pred mu min", test_data['pred_mu'].min())
print( "Pred mu max", test_data['pred_mu'].max())
print( "Pred disp min", test_data['pred_disp'].min())
print( "Pred disp max", test_data['pred_disp'].max())
print( "Pred p min", test_data['pred_p'].min())
print( "Pred p max", test_data['pred_p'].max())

Pred mu min 0.061266669576677195
Pred mu max 19.715660409132642
Pred disp min 0.40597732230722566
Pred disp max 0.40597732230722566
Pred p min 1.1
Pred p max 1.1
