In [1]:
import numpy as np
import pickle as pkl

from src.models.train_emos import train_emos, train_and_test_emos
from src.visualization.pit import make_cpit_diagram_emos, make_cpit_hist_emos 
from src.visualization.brier_score import brier_skill_plot, brier_plot
from src.models.get_data import get_tensors, get_normalized_tensor
from src.models.emos import EMOS
from src.visualization.scoring_tables import make_table

2024-03-07 08:45:03.622668: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-07 08:45:03.649227: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-07 08:45:03.649249: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-07 08:45:03.649969: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-07 08:45:03.654317: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-07 08:45:03.654845: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
neighbourhood_size = 11
parameter_names = ['wind_speed', 'press', 'kinetic', 'humid', 'geopot']
ignore = ['229', '285', '323']
train_folds = [1, 2]
train_data = get_normalized_tensor(neighbourhood_size, parameter_names, train_folds, ignore)

X_train = train_data['X']
y_train = train_data['y']
variances_train = train_data['variances']
mean_train = train_data['mean']
std_train = train_data['std']


print(X_train.shape)

(15733, 5)


In [3]:
test_fold = 3

X_test, y_test, variances_test = get_tensors(neighbourhood_size, parameter_names, test_fold, ignore)
X_test = (X_test - mean_train) / std_train

print(X_test.shape)

(7780, 5)


In [4]:
setup = {}

setup["num_features"] = len(parameter_names)
setup["feature_mean"] = mean_train
setup["feature_std"] = std_train
setup["features"] = parameter_names
setup["neighbourhood_size"] = neighbourhood_size

Possible settings for EMOS class:

loss:

    loss_CRPS_sample
    loss_log_likelihood
    loss_Brier_score 
    loss_twCRPS_sample

in case sample is used loss: sample

chaining_functions

    chain_function_indicator 
        includes:
        
         threshold
    chain_function_normal_cdf
        includes: 
        
        chain_function_mean
         chain_function_std

optimizer:

    Adam
    SGD

learning_rate:
    -Positive real valued number

forecast_distribution:

    distr_trunc_normal
    distr_log_normal
    distr_gev(2/3)

    distr_mixture (which contains two of the above distributions)
    distr_mixture_linear

they must contain distribution_1/distribution_2

    


In [5]:
models_crps = {}
setup1 = setup

setup1["loss"] = "loss_CRPS_sample"
setup1["samples"] = 300
setup1["optimizer"] = "Adam"
setup1["learning_rate"] = 0.01
setup1["forecast_distribution"] = "distr_trunc_normal"

crps_tn = EMOS(setup1)

setup1["forecast_distribution"] = "distr_log_normal"

crps_ln = EMOS(setup1)

setup1["forecast_distribution"] = "distr_gev"
setup1["samples"] = 300

crps_gev = EMOS(setup1)

setup1["forecast_distribution"] = "distr_frechet"

crps_frechet = EMOS(setup1)





models_crps["crps_tn"] = crps_tn
models_crps["crps_ln"] = crps_ln
models_crps["crps_gev"] = crps_gev
models_crps["crps_frechet"] = crps_frechet


print(len(models_crps))

Using default parameters for truncated normal distribution
Using default parameters for Log Normal distribution
Using default parameters for Generalized Extreme Value distribution
Using default parameters for Frechet distribution
4


In [6]:
epochs = 600

for model in models_crps:
    models_crps[model].fit(X_train, y_train, variances_train, epochs, printing=False)
    print("Model: ", models_crps[model])

Final loss:  0.9070171
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution: distr_trunc_normal
Parameters:
  a_tn: [0.3999338]
  b_tn: [ 0.9654551  -0.02524417 -0.1457744   0.0331571   0.01270436]
  c_tn: [1.5202594]
  d_tn: [1.0406542]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11

Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  1.0519568
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution: distr_log_normal
Parameters:
  a_ln: [0.8470826]
  b_ln: [ 0.13392808 -0.08153324 -0.03561439 -0.0221632   0.07455358]
  c_ln: [0.12301604]
  d_ln: [-0.00219814]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11

Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  0.90719897
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution:

In [7]:
setup1["forecast_distribution"] = "distr_mixture"
setup1["distribution_1"] = "distr_trunc_normal"
setup1["distribution_2"] = "distr_log_normal"

crps_mix_tn_ln = EMOS(setup1)
crps_mix_tn_ln.set_parameters(models_crps["crps_tn"].get_parameters())
crps_mix_tn_ln.set_parameters(models_crps["crps_ln"].get_parameters())

setup1["distribution_2"] = "distr_gev"
crps_mix_tn_gev = EMOS(setup1)

crps_mix_tn_gev.set_parameters(models_crps["crps_tn"].get_parameters())
crps_mix_tn_gev.set_parameters(models_crps["crps_gev"].get_parameters())

setup1["distribution_2"] = "distr_frechet"
crps_mix_tn_frechet = EMOS(setup1)

crps_mix_tn_frechet.set_parameters(models_crps["crps_tn"].get_parameters())
crps_mix_tn_frechet.set_parameters(models_crps["crps_frechet"].get_parameters())

models_crps["crps_mix_tn_ln"] = crps_mix_tn_ln
models_crps["crps_mix_tn_gev"] = crps_mix_tn_gev
models_crps["crps_mix_tn_frechet"] = crps_mix_tn_frechet



Using default parameters for truncated normal distribution
Using default parameters for Log Normal distribution
Parameter a_tn set to [0.3999338]
Parameter b_tn set to [ 0.9654551  -0.02524417 -0.1457744   0.0331571   0.01270436]
Parameter c_tn set to [1.5202594]
Parameter d_tn set to [1.0406542]
Parameter a_ln set to [0.8470826]
Parameter b_ln set to [ 0.13392808 -0.08153324 -0.03561439 -0.0221632   0.07455358]
Parameter c_ln set to [0.12301604]
Parameter d_ln set to [-0.00219814]
Using default parameters for truncated normal distribution
Using default parameters for Generalized Extreme Value distribution
Parameter a_tn set to [0.3999338]
Parameter b_tn set to [ 0.9654551  -0.02524417 -0.1457744   0.0331571   0.01270436]
Parameter c_tn set to [1.5202594]
Parameter d_tn set to [1.0406542]
Parameter a_gev set to [0.34993726]
Parameter b_gev set to [ 0.8720208  -0.03159553 -0.09541283  0.01321196  0.0396996 ]
Parameter c_gev set to [0.78129226]
Parameter d_gev set to [ 0.13783355  0.0088

In [8]:
models_crps["crps_mix_tn_ln"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_crps["crps_mix_tn_ln"])

models_crps["crps_mix_tn_gev"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_crps["crps_mix_tn_gev"])

models_crps["crps_mix_tn_frechet"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_crps["crps_mix_tn_frechet"])

Final loss:  0.90409464
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution: distr_mixture
Distribution 1: distr_trunc_normal
Distribution 2: distr_log_normal
Mixture weight: [0.6870861]
Parameters:
  weight: [0.6870861]
  a_tn: [-1.3340845]
  b_tn: [ 1.3566552   0.13018653 -0.32621866 -0.06593157 -0.02718521]
  c_tn: [0.5876389]
  d_tn: [2.6577554]
  a_ln: [1.1989557]
  b_ln: [ 0.02905305 -0.10519076  0.03432228  0.05031336  0.0478396 ]
  c_ln: [0.56797266]
  d_ln: [-0.04821111]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11

Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  0.89981747
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution: distr_mixture
Distribution 1: distr_trunc_normal
Distribution 2: distr_gev
Mixture weight: [0.73516476]
Parameters:
  weight: [0.73516476]
  a_tn: [0.21259913]
  b_tn: [ 0.9664837   0.03484

In [9]:
setup1["forecast_distribution"] = "distr_mixture_linear"
setup1["distribution_1"] = "distr_trunc_normal"
setup1["distribution_2"] = "distr_log_normal"

crps_mixlinear_tn_ln = EMOS(setup1)
crps_mixlinear_tn_ln.set_parameters(models_crps["crps_tn"].get_parameters())
crps_mixlinear_tn_ln.set_parameters(models_crps["crps_ln"].get_parameters())

setup1["distribution_2"] = "distr_gev"
crps_mixlinear_tn_gev = EMOS(setup1)

crps_mixlinear_tn_gev.set_parameters(models_crps["crps_tn"].get_parameters())
crps_mixlinear_tn_gev.set_parameters(models_crps["crps_gev"].get_parameters())

setup1["distribution_2"] = "distr_frechet"
crps_mixlinear_tn_frechet = EMOS(setup1)

crps_mixlinear_tn_frechet.set_parameters(models_crps["crps_tn"].get_parameters())
crps_mixlinear_tn_frechet.set_parameters(models_crps["crps_frechet"].get_parameters())

models_crps["crps_mixlinear_tn_ln"] = crps_mixlinear_tn_ln
models_crps["crps_mixlinear_tn_gev"] = crps_mixlinear_tn_gev
models_crps["crps_mixlinear_tn_frechet"] = crps_mixlinear_tn_frechet


Using default parameters for truncated normal distribution
Using default parameters for Log Normal distribution
Using default weight parameters
Parameter a_tn set to [0.3999338]
Parameter b_tn set to [ 0.9654551  -0.02524417 -0.1457744   0.0331571   0.01270436]
Parameter c_tn set to [1.5202594]
Parameter d_tn set to [1.0406542]
Parameter a_ln set to [0.8470826]
Parameter b_ln set to [ 0.13392808 -0.08153324 -0.03561439 -0.0221632   0.07455358]
Parameter c_ln set to [0.12301604]
Parameter d_ln set to [-0.00219814]
Using default parameters for truncated normal distribution
Using default parameters for Generalized Extreme Value distribution
Using default weight parameters
Parameter a_tn set to [0.3999338]
Parameter b_tn set to [ 0.9654551  -0.02524417 -0.1457744   0.0331571   0.01270436]
Parameter c_tn set to [1.5202594]
Parameter d_tn set to [1.0406542]
Parameter a_gev set to [0.34993726]
Parameter b_gev set to [ 0.8720208  -0.03159553 -0.09541283  0.01321196  0.0396996 ]
Parameter c_gev

In [10]:
models_crps["crps_mixlinear_tn_ln"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_crps["crps_mixlinear_tn_ln"])

models_crps["crps_mixlinear_tn_gev"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_crps["crps_mixlinear_tn_gev"])

models_crps["crps_mixlinear_tn_frechet"].fit(X_train, y_train, variances_train, epochs, printing=False)
print("Model: ", models_crps["crps_mixlinear_tn_frechet"])

Final loss:  0.8954604
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution: distr_mixture_linear
Distribution 1: distr_trunc_normal
Distribution 2: distr_log_normal
Mixture weight a: [-0.57523257]
Mixture weight b: [0.60745335]
Parameters:
  weight_a: [-0.57523257]
  weight_b: [0.60745335]
  a_tn: [0.76594645]
  b_tn: [ 0.93367267  0.13759331 -0.11586664  0.16060491  0.2059073 ]
  c_tn: [2.4105873]
  d_tn: [0.724831]
  a_ln: [-0.31511748]
  b_ln: [ 0.22805358 -0.28740498  0.14592326 -0.24856983 -0.37500352]
  c_ln: [0.6830854]
  d_ln: [0.2181]
Features: wind_speed, press, kinetic, humid, geopot
Number of features: 5
Neighbourhood size: 11

Optimizer: Adam
Learning rate: 0.009999999776482582

Final loss:  0.89272875
Model:  EMOS Model Information:
Loss function: loss_CRPS_sample (Samples: 300)
Forecast distribution: distr_mixture_linear
Distribution 1: distr_trunc_normal
Distribution 2: distr_gev
Mixture weight a: [0.5729996]
Mixture weig

In [13]:
parameter_dict = {}

for name, model in models_crps.items():
    parameter_dict[name] = model.to_dict()

# save the parameter_dict as pkl file in /net/pc200239/nobackup/users/hakvoort/models

with open('/net/pc200239/nobackup/users/hakvoort/models/emos_crps.pkl', 'wb') as f:
    pkl.dump(parameter_dict, f)


In [15]:
for name, model in models_crps.items():
    loss = model.loss_CRPS_sample_general(X_test, y_test, variances_test, 10000)
    print(name, loss.numpy())

crps_tn 0.89019674
crps_ln 1.0308855
crps_gev 0.89093673
crps_frechet 0.8947521
crps_mix_tn_ln 0.8901133
crps_mix_tn_gev 0.88470876
crps_mix_tn_frechet 0.88451064
crps_mixlinear_tn_ln 0.89610326
crps_mixlinear_tn_gev 0.8896949
crps_mixlinear_tn_frechet 0.8887749
