# Online Shoppers Intention dataset (Perceptron)
Predicting Online Shoppers Purchasing Intention Using a simple neural network build using keras:

In [1]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer

In [9]:
data = pd.read_csv("Online-shoppers-intention.csv")

In [10]:
# Convert booleans value of Weekend and revenue to int 0,1 with pandas astype()
data[["Weekend", "Revenue"]] = data[["Weekend", "Revenue"]] .values.astype(int)

# For string values of visitors type and months
# For string visitor type 
data["VisitorType"] = np.asarray([1 if val == "Returning_Visitor" else 0 for val in data["VisitorType"].values])

In [11]:
# handling missing data points
data.isnull().sum()

Homepage              0
Homepage _Duration    0
Aboutus               0
Aboutus_Duration      0
Contactus             0
Contactus_Duration    0
BounceRates           0
ExitRates             0
PageValues            0
SpecialDay            0
Month                 0
OperatingSystems      0
Browser               0
Province              0
TrafficType           0
VisitorType           0
Weekend               0
Revenue               0
dtype: int64

In [13]:
# All data column should have minimum value of 0 normalization for all such data
def normalization(column):
    data[column] = np.asfarray((data[column])/float(max(data[column]) * 0.99) + 0.01)

In [14]:
# bringing revenue column to first
column_list = data.columns.tolist()
column_list.insert(0, column_list[-1])
column_list.pop()
data = data[column_list]

In [15]:
# Split the data for training and test (15% for testing) using sktlearn inbuild method
from sklearn.model_selection import train_test_split

shoppers_train, shoppers_test = train_test_split(data, test_size = 0.15)
shoppers_train.to_csv(r"shoppers_train.csv", index = None, header = True)
shoppers_test.to_csv(r"shoppers_test.csv", index = None, header = True)


In [16]:
x_train = shoppers_train.iloc[:,1:]
y_train = shoppers_train.iloc[:,0]
print(x_train.shape, y_train.shape)

(10480, 17) (10480,)


In [17]:
class NeuralNetwork():
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):

        self.inputnodes = inputnodes
        self.outputputnodes = outputnodes
        self.hiddennodes = hiddennodes

        self.learningrate = learningrate
        # sigmoid function
        # self.activation_function = lambda x: scipy.special.expit(x)

        # weight for both synopsys
        self.w_input_hidden = (np.random.rand(self.hiddennodes, self.inputnodes) - 0.5)
        self.w_hidden_output = (np.random.rand(self.outputputnodes, self.hiddennodes) - 0.5)
        
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        a =  x * (1-x)
        # print("In sigmoid derivative",a)
        return a

    def train(self, input_list, target_list, training_iterations):
        for iteration in range(training_iterations):
            inputs = np.array(input_list, ndmin = 2).T
            targets = np.array(target_list, ndmin = 2).T

            hidden_outputs = self.sigmoid(np.dot(self.w_input_hidden, inputs))
            # print("Hidden output is", hidden_outputs)

            final_outputs = self.sigmoid(np.dot(self.w_hidden_output, hidden_outputs))
            # print("FInal output is", final_outputs)

            outputs_error = targets.T - final_outputs
            # print("Main output erroe",outputs_error)
            hidden_errors = np.dot(self.w_hidden_output.T, outputs_error)
            # print("Main hidden erroe",hidden_errors)

 
            adjustment = self.learningrate * np.dot((outputs_error * self.sigmoid_derivative(final_outputs)), hidden_outputs.T)
            self.w_hidden_output += adjustment

            self.w_input_hidden += self.learningrate * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), np.transpose(inputs))

        return final_outputs


In [18]:
input_nodes = 17
hidden_nodes = 8
output_nodes = 1

learning_rate = 0.1

n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

In [19]:
# print(n.train(x_train, y_train,1000))

In [20]:

inp = [0,0,0,0,1,0,0.2,0.2,0,0,2,1,1,1,1,1,0]
out = [0] 

print(n.train(inp, out,1))


[[0.39798377]]
