# Prediction of customers' travel pattern

- Applying ML + Neural Network models to selected featured data

# 1)-Importing key modules

In [18]:
import warnings
warnings.filterwarnings('ignore')
# For processing
import pandas as pd
import numpy as np
import scipy
from collections import Counter
import datetime as dt
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (16, 10)
plt.rcParams["xtick.labelsize"] = 10
plt.figure(figsize=(16,10)) # this creates a figure 16 inch wide, 10 inch high
from pprint import pprint
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [19]:
# For modeling building and tunning
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import NearMiss
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier 

In [20]:
# for evaluation
from sklearn import metrics
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [21]:
from datetime import date
import datetime as dt

# 2)-Loading data

In [22]:
df = pd.read_csv('updated_feature.csv')
df.shape

(45805, 514)

In [23]:
df.head()

Unnamed: 0,event_type,distance,num_family,len_jour,origin_ADB,origin_ADL,origin_AER,origin_AGP,origin_AKL,origin_ALA,...,dest_YEG,dest_YMQ,dest_YOW,dest_YTO,dest_YUL,dest_YVR,dest_YWG,dest_YYC,dest_YYZ,dest_ZRH
0,0,5834.154716,7,6.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,6525.926149,4,21.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,469.781624,2,3.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1498.817537,1,3.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,2921.339028,4,6.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
y=df["event_type"]
features=df.drop(['event_type'], axis=1)
X = StandardScaler().fit_transform(features)

In [25]:
from imblearn.under_sampling import NearMiss
nm = NearMiss()
X_under,y_under=nm.fit_sample(X,y)

In [26]:
print(X_under.shape,y_under.shape)

(3616, 513) (3616,)


In [27]:
from collections import Counter
print('Original dataset shape {}'.format(Counter(y)))
print('Over-sampled dataset shape {}'.format(Counter(y_under)))

Original dataset shape Counter({0: 43997, 1: 1808})
Over-sampled dataset shape Counter({0: 1808, 1: 1808})


# 4)- ML Models

In [28]:
models = []
models.append(("NB",GaussianNB()))
models.append(("SVM",SVC()))
models.append(("KNN",KNeighborsClassifier()))
models.append(("DT",DecisionTreeClassifier()))
models.append(("RF",RandomForestClassifier()))
models.append(("GB",GradientBoostingClassifier()))
models.append(("MLP",MLPClassifier()))

In [29]:
results = []
names = []
for name,model in models:
    kfold = KFold(n_splits=10, random_state=22)
    cv_result = cross_val_score(model,X_under,y_under, cv = kfold,scoring = "precision")
    names.append(name)
    results.append(cv_result)
for i in range(len(names)):
    print(names[i],results[i].mean())

LR 0.5024691358024691
NB 0.5142857142857142
SVM 0.5025000000000001
KNN 0.5017391304347826
DT 0.5008771929824561
RF 0.5008230452674897
GB 0.5011299435028248
MLP 0.5015384615384615


Precision results for these models are pretty same. We didnt get any improvement from Logistic Regress- being the simplest model to Random Forest, Gradiant Boosting or even multilayer perceptron- more advanced level models. Let's see how deep learning aka neural network with Stochastic gradient descent works.

# 5)- Implementing Neural Network
Steps to follow

- 1-Build the Neural Network
- 2 -Set the hyperparameters, train the NN and evaluate
- 3- Adapt SGD method to improve the accuracy & Recall score

In [30]:
target=df["event_type"]
features=df.drop(['event_type'], axis=1)
X = StandardScaler().fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.3, random_state=0)
nm = NearMiss()
X_under,y_under=nm.fit_sample(X_train,y_train)

### a)-Build the Neural Network

In [31]:
class MyNeuralNetwork(object):
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        # Set number of nodes in input, hidden and output layers.
        self.input_nodes =  input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes

        # Initialize weights
        self.weights_0_1 = np.zeros((self.hidden_nodes,self.input_nodes))

        self.weights_1_2 = np.random.normal(0.0, self.output_nodes**-0.5, 
                                       (self.output_nodes, self.hidden_nodes))
        self.lr = learning_rate
        
        #### Set this to your implemented sigmoid function ####
        # Activation function is the sigmoid function
        self.sigmoid_activation = lambda x : 1 / (1 + np.exp(-x))
        self.sigmoid_output_2_derivative = lambda x: x * (1 - x)
    
    def train(self, inputs_array, targets_array):
        # Convert inputs list to 2d array
        inputs  = inputs_array.T
        targets = np.array(targets_array, ndmin=2)
        #targets = targets_array
        m = inputs_array.shape[0] # number of records
        
        #### Implement the forward pass here ####
        ### Forward pass ###
        # TODO: Hidden layer
        layer_1_inputs = np.dot(self.weights_0_1, inputs) # signals into hidden layer
        layer_1 = layer_1_inputs # signals from hidden layer
        
        # TODO: Output layer
        layer_2_inputs = np.dot(self.weights_1_2,layer_1) # signals into final output layer
        layer_2 = self.sigmoid_activation(layer_2_inputs) # signals from final output layer
        
        #### Implement the backward pass here ####
        ### Backward pass ###
        
        # TODO: Output error  
        layer_2_errors = targets - layer_2  # Output layer error is the difference between desired target and actual output.
        layer_2_delta = layer_2_errors * self.sigmoid_output_2_derivative(layer_2)
        
        # TODO: Backpropagated error
        layer_1_errors = np.dot(self.weights_1_2.T,layer_2_delta) # errors propagated to the hidden layer 2x128
        layer_1_delta = layer_1_errors  # hidden layer gradients y = x -> 1
        
        # TODO: Update the weights
        self.weights_1_2 += self.lr*np.dot(layer_2_delta,layer_1.T)/m # update hidden-to-output weights with gradient descent step
        self.weights_0_1 += self.lr*np.dot(layer_1_delta,inputs.T)/m # update input-to-hidden weights with gradient descent step
         
        
    def run(self, inputs_list):
        # Run a forward pass through the network
        inputs = np.array(inputs_list, ndmin=2).T
        
        #### Implement the forward pass here ####
        # TODO: Hidden layer
        hidden_inputs = np.dot(self.weights_0_1, inputs) # signals into hidden layer
        hidden_outputs = hidden_inputs # signals from hidden layer
        
        # TODO: Output layer
        final_inputs = np.dot(self.weights_1_2,hidden_outputs) # signals into final output layer
        final_outputs = self.sigmoid_activation(final_inputs) # signals from final output layer 
        
        return final_outputs

### b.Train the mode

In [32]:
### Set the hyperparameters here ###
epochs = 100 #100
learning_rate = 0.01 #0.1
hidden_nodes = 10 
output_nodes = 1

N_i = X_train.shape[1]
network = MyNeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)

for e in range(epochs):
    network.train(X_under, y_under)
    
y_pred = network.run(X_test)
y_pred = np.where(y_pred >= 0.5, 1, 0) # if probability >= 0.5, it is 1, else 0

print(classification_report(y_pred[0], y_test))

              precision    recall  f1-score   support

           0       0.43      0.95      0.59      5961
           1       0.47      0.03      0.06      7781

    accuracy                           0.43     13742
   macro avg       0.45      0.49      0.33     13742
weighted avg       0.45      0.43      0.29     13742



### c.SGD

Stochastic gradient descent 

In [33]:
N_i = X_train.shape[1]
network = MyNeuralNetwork(N_i, hidden_nodes, output_nodes, learning_rate)

random_row_idx = np.zeros(32)
for e in range(epochs):
    random_row_idx = np.random.choice(X_under.shape[0],size=32)
    X_batch = X_under[random_row_idx,:]
    y_batch = y_under[random_row_idx]
    network.train(X_batch, y_batch)

In [34]:
y_pred = network.run(X_test)
y_pred = np.where(y_pred >= 0.5, 1, 0) # if probability >= 0.5, it is 1, else 0
print(classification_report(y_pred[0], y_test))

              precision    recall  f1-score   support

           0       0.46      0.96      0.62      6398
           1       0.47      0.03      0.06      7344

    accuracy                           0.46     13742
   macro avg       0.47      0.50      0.34     13742
weighted avg       0.47      0.46      0.32     13742



We can see Neural network and its results are pretty much consistent with our ML models in this notebook and to previous notebooks' results. Idea was to see if any better can be obtained. Additionally, I wanted to show that I have one additional tool in my bag.

From here, we have seen that models are not making much improvement. In next notebook , I ll explain what are evaluation matrics are, what they mean to our problem, also I ll tune ML model and give more suggestions to improve results by model tuning.

**END OF NOTEBOOK 4**