In [None]:
import main
import numpy as np
import config
import lstm

In [None]:
# Fix random seed for reproducibility.
seed = 10
np.random.seed(seed)

In [None]:
# Prepare data.
data = main.load_and_clean_data()

x, y, _ = main.create_supervised_data_set(data[data['CountryName'] != 'Norway'].copy())

In [None]:
# Visualize data.


In [None]:
# Set hyper-parameters.
if config.USE_CACHED_HYPERPARAMETERS:
    best_params = {'activation': 'tanh','learn_rate': 0.001,'neurons': 20}
else:
    best_params, non_nested_r2_score, nested_r2_scores = main.nested_cross_validation(x, y)
# Comments made in commit: f3caa99
print("Best params:", best_params) # Best params: {'activation': 'hard_sigmoid', 'learn_rate': 0.05, 'neurons': 20}
print("Non-nested cross validation r2 score:", non_nested_r2_score) # Non-nested cross validation r2 score: -0.0003677288186736405
print("Nested cross validation r2 scores:", nested_r2_scores) # Nested cross validation r2 scores: [-0.00061681 -0.00021794 -0.00018851 -0.00043963 -0.00013743]
print("Nested cross validation r2 scores mean:", nested_r2_scores.mean()) # Nested cross validation r2 scores mean: -0.0003200653416570276

In [None]:
# Train/load model.
model = lstm.create_model(**best_params)
X_train, X_val, Y_train, Y_val = main.split_data(x, y)
if config.USE_CACHED_FITTED_MODEL:
    model.load_weights('Models/model_10_0.0001.h5')
else:
    history = lstm.train_model(model, X_train, Y_train, validation=(X_val, Y_val))
    main.draw_graph({'x':range(config.EPOCHS),'y':history['val_loss'],'name':'val_loss'},{'x':range(config.EPOCHS),'y':history['loss'],'name':'loss'})

In [None]:
# Visualize predictions from the model.
main.visualize_predictions(model, data)


In [None]:
# Display SHAP values for the different features.
samples = np.random.choice(x.shape[0], size=2000)
lstm.calculate_shap(model, x[np.random.choice(x.shape[0], size=1500)], x[samples], config.FEATURES)


