In [42]:
%tensorflow_version 2.x
import os
import time
import statistics
import numpy as np
import pandas as pd
from sklearn import metrics
from scipy.stats import zscore
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Activation
from sklearn.model_selection import StratifiedShuffleSplit


## Read the data set
df = pd.read_csv("https://data.heatonresearch.com/data/t81-558/jh-simple-dataset.csv", na_values=['NA','?'])


## Generate dummies for job
df = pd.concat([df,pd.get_dummies(df['job'],prefix="job")],axis=1)
df.drop('job', axis=1, inplace=True)


## Generate dummies for area
df = pd.concat([df,pd.get_dummies(df['area'],prefix="area")],axis=1)
df.drop('area', axis=1, inplace=True)


## Missing values for income
df['income'] = df['income'].fillna(df['income'].median())


## Standardize ranges
df['income'] = zscore(df['income'])
df['aspect'] = zscore(df['aspect'])
df['save_rate'] = zscore(df['save_rate'])
df['age'] = zscore(df['age'])
df['subscriptions'] = zscore(df['subscriptions'])


## Convert to numpy - Classification
x_columns = df.columns.drop('product').drop('id')
x = df[x_columns].values
dummies = pd.get_dummies(df['product']) # Classification
products = dummies.columns
y = dummies.values


## BootStrap
boot = StratifiedShuffleSplit(n_splits=50, test_size=0.1, random_state=42)

num = 0
epochs_needed = []
Log_Losses = []
print("number    score    mean score    stdev    epochs    mean epochs    ")
print("------   -------   ----------   -------   ------    -----------")

for train, test in boot.split(x, df['product']):
  num += 1

  # Spliting to training and testing set
  x_train = x[train]
  y_train = y[train]
  x_test = x[test]
  y_test = y[test]

  # Construct neural network
  model = Sequential()
  model.add(Dense(50, input_dim=x.shape[1], activation='relu'))          # Hidden 1
  model.add(Dense(25, activation='relu'))           # Hidden 2
  model.add(Dense(y.shape[1],activation='softmax'))           # Output
  model.compile(loss='categorical_crossentropy', optimizer='adam')
  monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=25, verbose=0, mode='auto', restore_best_weights=True)

  # Train on the bootstrap sample
  model.fit(x_train,y_train,validation_data=(x_test,y_test),callbacks=[monitor],verbose=0,epochs=1000)
  epochs = monitor.stopped_epoch
  epochs_needed.append(epochs)

  # Predict on the validation set
  pred = model.predict(x_test)
  ##################### y_hat = np.argmax(pred, axis=1)
  y_true = np.argmax(y_test, axis=1)
  #####################print(pred)
  #####################print(y_hat)
  ##################### print(y_true)
  # Calculatng Log Loss
  score = metrics.log_loss(y_true, pred)
  Log_Losses.append(score)

  # Mean and standard deviation calculation
  epochs_mean = statistics.mean(epochs_needed)
  scores_mean = statistics.mean(Log_Losses)
  scores_std = statistics.pstdev(Log_Losses)

  # Recording Section
  print(f" #{num}      {score:.4f}      {scores_mean:.4f}     {scores_std:.4f}      {epochs}          {int(epochs_mean)}")



number    score    mean score    stdev    epochs    mean epochs    
------   -------   ----------   -------   ------    -----------
 #1      0.6771      0.6771     0.0000      62          62
 #2      0.6678      0.6724     0.0046      49          55
 #3      0.6822      0.6757     0.0059      42          51
 #4      0.6807      0.6769     0.0056      90          60
 #5      0.6837      0.6783     0.0057      61          60
 #6      0.7004      0.6820     0.0097      55          59
 #7      0.7083      0.6857     0.0129      51          58
 #8      0.7581      0.6948     0.0268      43          56
 #9      0.6197      0.6864     0.0346      71          58
 #10      0.6673      0.6845     0.0333      61          58
 #11      0.6974      0.6857     0.0319      54          58
 #12      0.7345      0.6898     0.0334      53          57
 #13      0.6964      0.6903     0.0322      78          59
 #14      0.6855      0.6899     0.0310      76          60
 #15      0.6605      0.6880     0.03