# Artificial Neural Network

This notebook presents the training and testing of the ANN for the NPD predictor

In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import math

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics

In [None]:
#load the data set
df = pd.read_pickle("df_features_final_var.pkl")
df_label = df['t_0']
df_features = df.drop(['t_0'], axis = 1)
len(df_features.columns)

# Scaled Neural Network MinMaxScaler

In [None]:
#set scaler
scaler_feature = MinMaxScaler()
scaler_label = MinMaxScaler()

#scale features
scaler_feature.fit(df_features)
scaled_features = scaler_feature.transform(df_features)

# define a baseline model
input_dimensions = len(df_features.columns)
num_output_values = 1

model = Sequential()
model.add(Dense(input_dimensions, input_dim = input_dimensions, kernel_initializer='normal', activation='relu'))
model.add(Dense(10,activation='relu'))
model.add(Dense(5,activation='relu'))
#model.add(Dense(5, activation='relu'))
model.add(Dense(num_output_values, kernel_initializer = 'normal'))

#compile model
model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

#scale the labels
scaler_label.fit(np.array(df_label).reshape(-1,1))
scaled_labels = scaler_label.transform(np.array(df_label).reshape(-1,1))

#train the model
history = model.fit(scaled_features, scaled_labels, epochs=50, batch_size=100,  verbose=1, validation_split=0.2)

### Testing the neural network model on the test set.

In [None]:
#load the data set
df_val = pd.read_pickle("df_test_final_var.pkl")
df_val['variance'] = np.sqrt(df_val['variance'])
df_val = df_val.drop(['product_seq', 'days_since_seq'], axis = 1)


#scaling the test set with the same scalers as the training set
X_test = df_val.drop(['t_0','num_times_purchased'], axis = 1)
y_test = df_val['t_0']

scaled_features_test = scaler_feature.transform(X_test)
scaled_labels_test = scaler_label.transform(np.array(y_test).reshape(-1,1))

In [None]:
#making the next purchase date predictions
predictions = model.predict(scaled_features_test)

#scaling the predictions back so that it can be compared with the target variable
prediction_scaled_back = scaler_label.inverse_transform(predictions)
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,prediction_scaled_back))

In [None]:
#plotting the absolute error
sorted_values_nn = np.sort(np.abs(y_test - prediction_scaled_back.ravel()))
plt.plot(sorted_values_nn)
plt.show()

In [None]:
prediction_scaled_back[prediction_scaled_back < 0] = 0

In [None]:
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,prediction_scaled_back))

# Scaled Neural Network StandardScaler

In [None]:
#set scaler
scaler_feature = StandardScaler()
scaler_label = StandardScaler()

#scale features
scaler_feature.fit(df_features)
scaled_features = scaler_feature.transform(df_features)

# define a baseline model
input_dimensions = len(df_features.columns)
num_output_values = 1

model = Sequential()
model.add(Dense(input_dimensions, input_dim = input_dimensions, kernel_initializer='normal', activation='relu'))
model.add(Dense(8, activation='relu'))
#model.add(Dense(5, activation='relu'))
model.add(Dense(num_output_values, kernel_initializer = 'normal'))

#compile model
model.compile(loss='mse', optimizer='adam', metrics=['mse','mae'])

#scale the labels
scaler_label.fit(np.array(df_label).reshape(-1,1))
scaled_labels = scaler_label.transform(np.array(df_label).reshape(-1,1))

#train the model
history = model.fit(scaled_features, scaled_labels, epochs=50, batch_size=100,  verbose=1, validation_split=0.2)

### Making NPD predictions for the test set

In [None]:
#load the data set
df_val = pd.read_pickle("df_test_final_var.pkl")
df_val['variance'] = np.sqrt(df_val['variance'])
df_val = df_val.drop(['product_seq', 'days_since_seq'], axis = 1)


#scaling the test set with the same scalers as the training set
X_test = df_val.drop(['t_0','num_times_purchased'], axis = 1)
y_test = df_val['t_0']

scaled_features_test = scaler_feature.transform(X_test)
scaled_labels_test = scaler_label.transform(np.array(y_test).reshape(-1,1))

In [None]:
#making the next purchase date predictions
predictions = model.predict(scaled_features_test)

#scaling the predictions back so that it can be compared with the target variable
prediction_scaled_back = scaler_label.inverse_transform(predictions)
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,prediction_scaled_back))

In [None]:
#plotting the absolute error
sorted_values_nn = np.sort(np.abs(y_test - prediction_scaled_back.ravel()))
plt.plot(sorted_values_nn)
plt.show()