In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from dateutil.parser import parse
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor

In [2]:
# Set Matplotlib defaults
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout=True, figsize=(11, 5))
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=16,
    titlepad=10,
)
plot_params = dict(
    color="0.75",
    style=".-",
    markeredgecolor="0.25",
    markerfacecolor="0.25",
    legend=False,
)
%config InlineBackend.figure_format = 'retina'

In [3]:
random_state = 42

In [16]:
feature_set =  'feature_set_normalised_pca'
X_train = pd.read_csv( F'./data/{feature_set}/X_train_full.csv')
y_train = pd.read_csv( F'./data/{feature_set}/y_train.csv').values.ravel()

X_val = pd.read_csv( F'./data/{feature_set}/X_valid_full.csv')
y_val = pd.read_csv( F'./data/{feature_set}/y_valid.csv').values.ravel()

In [17]:
architecture = np.array([32,64,64,32])
model = MLPRegressor(hidden_layer_sizes=architecture,
                     random_state=random_state,
                     activation='relu',
                     solver='adam',
                     alpha=0.001,
                     batch_size='auto',
                     learning_rate = 'constant',
                     learning_rate_init=0.001,
                     early_stopping=True,
                     validation_fraction=0.1,
                     tol = 1e-4,
                     n_iter_no_change = 10,
                     epsilon=1e-8,
                     verbose=1
                     )
model.fit(X_train,y_train)

Iteration 1, loss = 0.01294148
Validation score: -0.001251




Iteration 2, loss = 0.01268685
Validation score: 0.002098




Iteration 3, loss = 0.01262016
Validation score: 0.002404




Iteration 4, loss = 0.01259999
Validation score: 0.000342




Iteration 5, loss = 0.01259303
Validation score: 0.002482




Iteration 6, loss = 0.01259071




Validation score: 0.002100
Iteration 7, loss = 0.01259066
Validation score: 0.001628




Iteration 8, loss = 0.01258967




Validation score: -0.001561
Iteration 9, loss = 0.01258868




Validation score: 0.002251
Iteration 10, loss = 0.01258759
Validation score: 0.001566




Iteration 11, loss = 0.01258759
Validation score: 0.002549




Iteration 12, loss = 0.01258659
Validation score: 0.002082




Iteration 13, loss = 0.01258593
Validation score: 0.002528




Iteration 14, loss = 0.01258600




Validation score: 0.002521
Validation score did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


In [18]:
train_predictions = model.predict(X_train)
print(f'Train ROC score with {architecture} architecture')
print(roc_auc_score(y_train, train_predictions))

val_predictions = model.predict(X_val)
print(f'Val ROC score with {architecture} architecture')
print(roc_auc_score(y_val, val_predictions))
print("\n")

Train ROC score with [32 64 64 32] estimators
0.5761561743225178
Val ROC score with [32 64 64 32] estimators
0.5728892841671157


