In [1]:
import pandas as pd
import numpy as np
import keras
import os
from src.constants import ConstantsBlog3, params_blog_3, hyperparameter_space
import warnings
from keras.models import Sequential
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense, multiply, Dropout # for creating regular densely-connected NN layers.
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import mean_poisson_deviance
from typing import *
import time
from src.utils import *
from collections import defaultdict
from pprint import pprint
import random
import shap
import math

In [2]:
# https://github.com/RonRichman/AI_in_Actuarial_Science/blob/master/NL%20Pricing/Keras%20-%20NL%20Pricing%20-%20NN_embed_transfer.R#L177
# https://www.analyticsvidhya.com/blog/2020/03/6-python-libraries-interpret-machine-learning-models/
# https://towardsdatascience.com/interpretability-of-deep-learning-models-9f52e54d72ab
# https://towardsdatascience.com/feed-forward-neural-networks-how-to-successfully-build-them-in-python-74503409d99a

In [3]:
warnings.filterwarnings("ignore")
pd.set_option('display.max_colwidth', 1000)
pd.set_option('display.float_format','{:,.2f}'.format)

In [4]:
RANDOM_STATE = params_blog_3.get(ConstantsBlog3.RANDOM_STATE)
TEST_SIZE = params_blog_3.get(ConstantsBlog3.TEST_SIZE)
VAL_SIZE = params_blog_3.get(ConstantsBlog3.VAL_SIZE_FROM_TRAIN_SIZE)

In [5]:
PATH_TO_DATA = params_blog_3.get(ConstantsBlog3.PATH_TO_DATA)
nb_claims_name, claim_amount_name = params_blog_3.get(ConstantsBlog3.NB_CLAIMS), params_blog_3.get(ConstantsBlog3.CLAIM_AMOUNT)
claim_frequency_name = params_blog_3.get(ConstantsBlog3.CLAIM_FREQUENCY)
exposure_name = params_blog_3.get(ConstantsBlog3.EXPOSURE_NAME)

In [6]:
df_freq = pd.read_pickle(os.path.join(PATH_TO_DATA, params_blog_3.get(ConstantsBlog3.DATASET_FREQ_NAME)))
df_sev = pd.read_pickle(os.path.join(PATH_TO_DATA, params_blog_3.get(ConstantsBlog3.DATASET_SEV_NAME)))

In [7]:
df_freq.head(2).transpose()

Unnamed: 0,0,1
ClaimNb,0.0,0.0
Exposure,0.09,0.84
ClaimAmount,0.0,0.0
claim_frequency,0.0,0.0
power_d,0.0,0.0
power_e,0.0,0.0
power_f,0.0,0.0
power_g,1.0,1.0
power_h,0.0,0.0
power_i,0.0,0.0


In [8]:
nb_claims, claim_amount = df_freq[nb_claims_name], df_freq[claim_amount_name]
X = df_freq.drop(columns=[claim_amount_name, claim_frequency_name])

In [9]:
x_train_val, x_test, y_train_val, y_test = train_test_split(X, nb_claims, 
                                                    test_size=TEST_SIZE, 
                                                    random_state=RANDOM_STATE,
                                                    stratify=X[nb_claims_name])


In [10]:
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, 
                                                    test_size=VAL_SIZE, 
                                                    random_state=RANDOM_STATE,
                                                   stratify=x_train_val[nb_claims_name])

In [11]:
exp_train, exp_val, exp_test = x_train[exposure_name], x_val[exposure_name], x_test[exposure_name]

In [12]:
x_train = x_train.drop(columns=[nb_claims_name])
x_val = x_val.drop(columns=[nb_claims_name])
x_test = x_test.drop(columns=[nb_claims_name])

In [13]:
results, best_model = run_optimization_neural_network(x_train, y_train, x_val, y_val, x_test, y_test, 
                                                      exp_test, hyperparameter_space,
                                                      n_max_experiments=params_blog_3.get(ConstantsBlog3.N_MAX_EXPERIMENTS),
                                                      max_optimization_time=params_blog_3.get(ConstantsBlog3.MAX_OPTIMIZATION_TIME),
                                                     )

-------------------------------------------------- 0th EXPERIMENT --------------------------------------------------
{'batch_size': 256,
 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>],
 'layer_param_0': {'activation': 'softmax',
                   'dropout_rate': 0.41000000000000003,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 75,
                   'use_bias': True},
 'layer_param_1': {'activation': 'sigmoid',
                   'dropout_rate': 0.34,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 5,
                   'use_bias': True},
 'layer_param_2': {'activation': 'tanh',
                   'dropout_rate': 0.08,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 25,
                   'use_bias': True},
 'nb_hidden_layers': 3,
 'optimizer': 'adam'}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/1

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
-------------------------------------------------- 3th EXPERIMENT --------------------------------------------------
{'batch_size': 256,
 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>],
 'layer_param_0': {'activation': 'tanh',
                   'dropout_rate': 0.11,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 2,
                   'use_bias': True},
 'layer_param_1': {'activation': None,
                   'dropout_rate': 0.12,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 75,
                   'use_bias': True},
 'nb_hidden_layers': 2,
 'optimizer': 'adam'}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
-------------------------------------------------- 4th EXPERIMENT -------------------------------------------

Epoch 10/10
-------------------------------------------------- 5th EXPERIMENT --------------------------------------------------
{'batch_size': 256,
 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>],
 'layer_param_0': {'activation': 'sigmoid',
                   'dropout_rate': 0.08,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 10,
                   'use_bias': True},
 'layer_param_1': {'activation': 'sigmoid',
                   'dropout_rate': 0.42000000000000004,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 10,
                   'use_bias': True},
 'layer_param_2': {'activation': 'sigmoid',
                   'dropout_rate': 0.43,
                   'kernel_initializer': 'glorot_uniform',
                   'units': 30,
                   'use_bias': True},
 'nb_hidden_layers': 3,
 'optimizer': 'adam'}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoc

In [18]:
results.reset_index(inplace=True, drop=True)
results.head(5)

Unnamed: 0,selected_hyperparams,poisson_dev
0,"{'nb_hidden_layers': 1, 'optimizer': 'adam', 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>], 'batch_size': 256, 'layer_param_0': {'units': 40, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}}",0.3
1,"{'nb_hidden_layers': 2, 'optimizer': 'adam', 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>], 'batch_size': 256, 'layer_param_0': {'units': 2, 'activation': 'tanh', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}, 'layer_param_1': {'units': 75, 'activation': None, 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}}",0.3
2,"{'nb_hidden_layers': 1, 'optimizer': 'adam', 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>], 'batch_size': 256, 'layer_param_0': {'units': 45, 'activation': 'tanh', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}}",0.3
3,"{'nb_hidden_layers': 3, 'optimizer': 'adam', 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>], 'batch_size': 256, 'layer_param_0': {'units': 10, 'activation': 'sigmoid', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}, 'layer_param_1': {'units': 10, 'activation': 'sigmoid', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}, 'layer_param_2': {'units': 30, 'activation': 'sigmoid', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}}",0.3
4,"{'nb_hidden_layers': 3, 'optimizer': 'adam', 'callbacks': [<keras.callbacks.EarlyStopping object at 0x000002233F778880>], 'batch_size': 256, 'layer_param_0': {'units': 5, 'activation': 'tanh', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}, 'layer_param_1': {'units': 2, 'activation': None, 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}, 'layer_param_2': {'units': 35, 'activation': 'sigmoid', 'use_bias': True, 'kernel_initializer': 'glorot_uniform'}}",0.3


In [17]:
results.head(1).selected_hyperparams.squeeze()

{'nb_hidden_layers': 1,
 'optimizer': 'adam',
 'callbacks': [<keras.callbacks.EarlyStopping at 0x2233f778880>],
 'batch_size': 256,
 'layer_param_0': {'units': 40,
  'activation': 'relu',
  'use_bias': True,
  'kernel_initializer': 'glorot_uniform'}}