# Online Shoppers Intention dataset

In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split

In [3]:
# import the data set through pandas read_csv
data = pd.read_csv("Online-shoppers-intention.csv")

In [4]:
# Convert booleans value of Weekend and revenue to int 0,1 with pandas astype()
data[["Weekend", "Revenue"]] = data[["Weekend", "Revenue"]] .values.astype(int)

# For string values of visitors type and months
# For string visitor type 
data["VisitorType"] = np.asarray([1 if val == "Returning_Visitor" else 0 for val in data["VisitorType"].values])

In [5]:
# All data column should have minimum value of 0 normalization for all such data
def normalization(column):
    data[column] = np.asfarray((data[column])/float(max(data[column]) * 0.99) + 0.01)

In [6]:
data.head(n=5)

Unnamed: 0,Homepage,Homepage _Duration,Aboutus,Aboutus_Duration,Contactus,Contactus_Duration,BounceRates,ExitRates,PageValues,SpecialDay,Month,OperatingSystems,Browser,Province,TrafficType,VisitorType,Weekend,Revenue
0,0.0,0.0,0.0,0.0,1.0,0.0,0.2,0.2,0.0,0.0,2.0,1.0,1.0,1.0,1.0,0,0.0,0.0
1,0.0,0.0,0.0,0.0,2.0,64.0,0.0,0.1,0.0,0.0,2.0,2.0,2.0,1.0,2.0,0,0.0,0.0
2,0.0,81.126229,0.0,34.60178,1.0,1199.253065,0.2,0.2,0.0,0.0,2.0,4.0,1.0,9.0,3.0,0,0.0,0.0
3,0.0,0.0,0.0,0.0,2.0,2.666667,0.05,0.14,0.0,0.0,2.0,3.0,2.0,2.0,4.0,0,0.0,0.0
4,0.0,0.0,0.0,0.0,10.0,627.5,0.02,0.05,0.0,0.0,2.0,3.0,3.0,1.0,4.0,0,1.0,0.0


In [7]:
# bring revenue column to first
column_list = data.columns.tolist()
column_list.insert(0, column_list[-1])
column_list.pop()
data = data[column_list]

In [8]:
# Split the data to output and input
input = data.iloc[:,1:]   # all column except the first column
output = data.iloc[:,0]   # revenue column as output
print(input.shape, output.shape)

(12330, 17) (12330,)


In [9]:
#split data into test and training data
input_train, input_test, output_train, output_test = train_test_split(input, output, test_size = 0.15, random_state = 42)

In [10]:
# create model and dense layer one by one specifying activation function
# first input layer require input dimension parameter
# sigmoid instead of relu for final probabilty between 0 and 1

model = Sequential()
model.add(Dense(17, input_dim = 17, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

In [11]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])

In [12]:
model.fit(input_train, output_train, epochs = 10, batch_size = 20, validation_data = (input_test, output_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x188df591278>

In [13]:
# testing with user input
inp = [[0,0,0,4,1,0,0.2,0.2,0,0,2,1,1,1,1,1,0]]
out = [[0]]

scores = model.evaluate(input_test, output_test)
print("\n%s: %.2f%%" %(model.metrics_names[1], scores[1]*100))


accuracy: 87.30%


In [17]:
model.save('DeployModel\keras-model.h5')

In [15]:
# from keras.wrappers.scikit_learn import KerasClassifier
# y_pred_nn = model.predict_classes(input_test)
# print("\n Testing Accuracy of Neural Network: ",round(accuracy(output_test,y_pred_nn),3))

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 17)                306       
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 144       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 459
Trainable params: 459
Non-trainable params: 0
_________________________________________________________________
