In [2]:
import numpy as np
import pickle as pkl

from src.models.train_emos import train_emos, train_and_test_emos
from pit import make_cpit_diagram_emos, make_cpit_hist_emos 
from brier_score import brier_skill_plot, brier_plot
from src.models.get_data import get_tensors, get_normalized_tensor
from src.models.emos import EMOS
from scoring_tables import make_table

In [3]:
neighbourhood_size = 11
parameter_names = ['wind_speed', 'press', 'kinetic', 'humid', 'geopot']
ignore = ['229', '285', '323']
train_folds = [1, 2]
train_data = get_normalized_tensor(neighbourhood_size, parameter_names, train_folds, ignore)

X_train = train_data['X']
y_train = train_data['y']
variances_train = train_data['variances']
mean_train = train_data['mean']
std_train = train_data['std']


print(X_train.shape)

(15733, 5)


In [4]:
test_fold = 3

X_test, y_test, variances_test = get_tensors(neighbourhood_size, parameter_names, test_fold, ignore)
X_test = (X_test - mean_train) / std_train

print(X_test.shape)

(7780, 5)


In [5]:
setup = {}

setup["num_features"] = len(parameter_names)
setup["feature_mean"] = mean_train
setup["feature_std"] = std_train
setup["features"] = parameter_names
setup["neighbourhood_size"] = neighbourhood_size

In [8]:
models_twcrps = {}
setup1 = setup

setup1["loss"] = "loss_twCRPS_sample"
setup1["chain_function"] = "chain_function_normal_cdf"
setup1["chain_function_mean"] = 12
setup1["chain_function_std"] = 4
setup1["samples"] = 300
setup1["optimizer"] = "Adam"
setup1["learning_rate"] = 0.01
setup1["forecast_distribution"] = "distr_trunc_normal"

twcrps_tn = EMOS(setup1)

setup1["forecast_distribution"] = "distr_log_normal"

twcrps_ln = EMOS(setup1)

setup1["forecast_distribution"] = "distr_gev"
setup1["samples"] = 300

twcrps_gev = EMOS(setup1)

setup1["forecast_distribution"] = "distr_frechet"

twcrps_frechet = EMOS(setup1)





models_twcrps["twcrps_tn"] = twcrps_tn
models_twcrps["twcrps_ln"] = twcrps_ln
models_twcrps["twcrps_gev"] = twcrps_gev
models_twcrps["twcrps_frechet"] = twcrps_frechet


print(len(models_twcrps))

Using default parameters for truncated normal distribution
Using default parameters for Log Normal distribution
Using default parameters for Generalized Extreme Value distribution
Using default parameters for Frechet distribution
4


In [12]:
epochs = 600

for model in models_twcrps:
    models_twcrps[model].fit(X_train, y_train, variances_train, epochs, printing=False)
    print("Model: ", models_twcrps[model])

Final loss:  0.11640031
Model:  EMOS Model Information:
Loss function: loss_twCRPS_sample (Samples: 300)
Forecast distribution: distr_trunc_normal
Parameters:
  a_tn: [0.5877269]
  b_tn: [ 0.92639434 -0.41853842 -0.11867731 -0.00225808  0.47088426]
  c_tn: [2.456814]
  d_tn: [0.9217215]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11
Chaining function: chain_function_normal_cdf (Mean: 12.0, Std: 4.0)
Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  0.12768853
Model:  EMOS Model Information:
Loss function: loss_twCRPS_sample (Samples: 300)
Forecast distribution: distr_log_normal
Parameters:
  a_ln: [1.2313056]
  b_ln: [ 0.09328818 -0.10883516 -0.0112197  -0.02521319  0.10967632]
  c_ln: [0.05795992]
  d_ln: [0.00217017]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11
Chaining function: chain_function_normal_cdf (Mean: 12.0, Std: 4.0)
Optimizer: Adam
Learning rate: 0.0099999

In [13]:
setup1["forecast_distribution"] = "distr_mixture"
setup1["distribution_1"] = "distr_trunc_normal"
setup1["distribution_2"] = "distr_log_normal"

twcrps_mix_tn_ln = EMOS(setup1)
twcrps_mix_tn_ln.set_parameters(models_twcrps["twcrps_tn"].get_parameters())
twcrps_mix_tn_ln.set_parameters(models_twcrps["twcrps_ln"].get_parameters())

setup1["distribution_2"] = "distr_gev"
twcrps_mix_tn_gev = EMOS(setup1)

twcrps_mix_tn_gev.set_parameters(models_twcrps["twcrps_tn"].get_parameters())
twcrps_mix_tn_gev.set_parameters(models_twcrps["twcrps_gev"].get_parameters())

setup1["distribution_2"] = "distr_frechet"
twcrps_mix_tn_frechet = EMOS(setup1)

twcrps_mix_tn_frechet.set_parameters(models_twcrps["twcrps_tn"].get_parameters())
twcrps_mix_tn_frechet.set_parameters(models_twcrps["twcrps_frechet"].get_parameters())

models_twcrps["twcrps_mix_tn_ln"] = twcrps_mix_tn_ln
models_twcrps["twcrps_mix_tn_gev"] = twcrps_mix_tn_gev
models_twcrps["twcrps_mix_tn_frechet"] = twcrps_mix_tn_frechet

Using default parameters for truncated normal distribution
Using default parameters for Log Normal distribution
Parameter a_tn set to [0.5877269]
Parameter b_tn set to [ 0.92639434 -0.41853842 -0.11867731 -0.00225808  0.47088426]
Parameter c_tn set to [2.456814]
Parameter d_tn set to [0.9217215]
Parameter a_ln set to [1.2313056]
Parameter b_ln set to [ 0.09328818 -0.10883516 -0.0112197  -0.02521319  0.10967632]
Parameter c_ln set to [0.05795992]
Parameter d_ln set to [0.00217017]
Using default parameters for truncated normal distribution
Using default parameters for Generalized Extreme Value distribution
Parameter a_tn set to [0.5877269]
Parameter b_tn set to [ 0.92639434 -0.41853842 -0.11867731 -0.00225808  0.47088426]
Parameter c_tn set to [2.456814]
Parameter d_tn set to [0.9217215]
Parameter a_gev set to [0.3646196]
Parameter b_gev set to [ 0.8813661  -0.3919423  -0.11474001 -0.01915812  0.6592176 ]
Parameter c_gev set to [1.0464277]
Parameter d_gev set to [ 0.09107146 -0.02860311 

In [15]:
models_twcrps["twcrps_mix_tn_ln"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_twcrps["twcrps_mix_tn_ln"])

models_twcrps["twcrps_mix_tn_gev"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_twcrps["twcrps_mix_tn_gev"])

models_twcrps["twcrps_mix_tn_frechet"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_twcrps["twcrps_mix_tn_frechet"])

Final loss:  0.11650933
Model:  EMOS Model Information:
Loss function: loss_twCRPS_sample (Samples: 300)
Forecast distribution: distr_mixture
Distribution 1: distr_trunc_normal
Distribution 2: distr_log_normal
Mixture weight: [0.8632085]
Parameters:
  weight: [0.8632085]
  a_tn: [-0.0580386]
  b_tn: [ 1.0153372  -0.7734631  -0.13046856 -0.29731596  0.42950717]
  c_tn: [3.7859535]
  d_tn: [1.0719019]
  a_ln: [1.2389482]
  b_ln: [ 0.07598402  0.15539557 -0.01059322  0.3055184   0.29462105]
  c_ln: [-0.00972036]
  d_ln: [-0.00358695]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11
Chaining function: chain_function_normal_cdf (Mean: 12.0, Std: 4.0)
Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  0.115676224
Model:  EMOS Model Information:
Loss function: loss_twCRPS_sample (Samples: 300)
Forecast distribution: distr_mixture
Distribution 1: distr_trunc_normal
Distribution 2: distr_gev
Mixture weight: [0.61700803]
Parameters:


In [18]:
setup1["forecast_distribution"] = "distr_mixture_linear"
setup1["distribution_1"] = "distr_trunc_normal"
setup1["distribution_2"] = "distr_log_normal"

twcrps_mixlinear_tn_ln = EMOS(setup1)
twcrps_mixlinear_tn_ln.set_parameters(models_twcrps["twcrps_tn"].get_parameters())
twcrps_mixlinear_tn_ln.set_parameters(models_twcrps["twcrps_ln"].get_parameters())

setup1["distribution_2"] = "distr_gev"
twcrps_mixlinear_tn_gev = EMOS(setup1)

twcrps_mixlinear_tn_gev.set_parameters(models_twcrps["twcrps_tn"].get_parameters())
twcrps_mixlinear_tn_gev.set_parameters(models_twcrps["twcrps_gev"].get_parameters())

setup1["distribution_2"] = "distr_frechet"
twcrps_mixlinear_tn_frechet = EMOS(setup1)

twcrps_mixlinear_tn_frechet.set_parameters(models_twcrps["twcrps_tn"].get_parameters())
twcrps_mixlinear_tn_frechet.set_parameters(models_twcrps["twcrps_frechet"].get_parameters())

models_twcrps["twcrps_mixlinear_tn_ln"] = twcrps_mixlinear_tn_ln
models_twcrps["twcrps_mixlinear_tn_gev"] = twcrps_mixlinear_tn_gev
models_twcrps["twcrps_mixlinear_tn_frechet"] = twcrps_mixlinear_tn_frechet

Using default parameters for truncated normal distribution
Using default parameters for Log Normal distribution
Using default weight parameters
Parameter a_tn set to [0.5877269]
Parameter b_tn set to [ 0.92639434 -0.41853842 -0.11867731 -0.00225808  0.47088426]
Parameter c_tn set to [2.456814]
Parameter d_tn set to [0.9217215]
Parameter a_ln set to [1.2313056]
Parameter b_ln set to [ 0.09328818 -0.10883516 -0.0112197  -0.02521319  0.10967632]
Parameter c_ln set to [0.05795992]
Parameter d_ln set to [0.00217017]
Using default parameters for truncated normal distribution
Using default parameters for Generalized Extreme Value distribution
Using default weight parameters
Parameter a_tn set to [0.5877269]
Parameter b_tn set to [ 0.92639434 -0.41853842 -0.11867731 -0.00225808  0.47088426]
Parameter c_tn set to [2.456814]
Parameter d_tn set to [0.9217215]
Parameter a_gev set to [0.3646196]
Parameter b_gev set to [ 0.8813661  -0.3919423  -0.11474001 -0.01915812  0.6592176 ]
Parameter c_gev set

In [20]:
models_twcrps["twcrps_mixlinear_tn_ln"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_twcrps["twcrps_mixlinear_tn_ln"])

models_twcrps["twcrps_mixlinear_tn_gev"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_twcrps["twcrps_mixlinear_tn_gev"])

models_twcrps["twcrps_mixlinear_tn_frechet"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_twcrps["twcrps_mixlinear_tn_frechet"])

Final loss:  0.11536607
Model:  EMOS Model Information:
Loss function: loss_twCRPS_sample (Samples: 300)
Forecast distribution: distr_mixture_linear
Distribution 1: distr_trunc_normal
Distribution 2: distr_log_normal
Mixture weight a: [-0.4601822]
Mixture weight b: [0.30469507]
Parameters:
  weight_a: [-0.4601822]
  weight_b: [0.30469507]
  a_tn: [0.90936166]
  b_tn: [ 0.80054826  0.19522229 -0.14016108  0.5298166   0.65256   ]
  c_tn: [3.5689232]
  d_tn: [0.40683395]
  a_ln: [0.2057744]
  b_ln: [ 0.29270282 -0.33174926 -0.00064898 -0.2041978   0.04356202]
  c_ln: [0.15158583]
  d_ln: [0.00978305]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11
Chaining function: chain_function_normal_cdf (Mean: 12.0, Std: 4.0)
Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  0.11500222
Model:  EMOS Model Information:
Loss function: loss_twCRPS_sample (Samples: 300)
Forecast distribution: distr_mixture_linear
Distribution 1: distr_trunc_

In [21]:
parameter_dict = {}

for name, model in models_twcrps.items():
    parameter_dict[name] = model.to_dict()

# save the parameter_dict as pkl file in /net/pc200239/nobackup/users/hakvoort/models

with open('/net/pc200239/nobackup/users/hakvoort/models/emos_twcrps.pkl', 'wb') as f:
    pkl.dump(parameter_dict, f)

print("Saved parameter dict as pkl file")

Saved parameter dict as pkl file


In [22]:
for name, model in models_twcrps.items():
    loss = model.loss_CRPS_sample_general(X_test, y_test, variances_test, 10000)
    print(name, loss.numpy())

twcrps_tn 0.8996583
twcrps_ln 1.1886774
twcrps_gev 0.90019286
twcrps_frechet 0.90329015
twcrps_mix_tn_ln 0.9147167
twcrps_mix_tn_gev 0.9008811
twcrps_mix_tn_frechet 0.89539635
twcrps_mixlinear_tn_ln 0.905757
twcrps_mixlinear_tn_gev 0.8978736
twcrps_mixlinear_tn_frechet 0.89529973
