In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('ecommerce_data.csv')

In [3]:
df.head()

Unnamed: 0,is_mobile,n_products_viewed,visit_duration,is_returning_visitor,time_of_day,user_action
0,1,0,0.65751,0,3,0
1,1,1,0.568571,0,2,1
2,1,0,0.042246,1,1,0
3,1,1,1.659793,1,1,2
4,0,1,2.014745,1,1,2


In [24]:
def get_data():
    df = pd.read_csv('ecommerce_data.csv')
    data = df.as_matrix()  # turning df into a numpy matrix
    
    X = data[:, :-1]  # everything upto the last column
    Y = data[:, -1] # the last column
    
    # We will normalize the numerical column
    X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()  # for n_products_viewed
    X[:, 2] = (X[:, 2] - X[:, 2].mean()) / X[:, 2].std()  # for visit_duration
    
    # Work on the categorial column
    N, D = X.shape
    X2 = np.zeros((N, D+3)) # for time_of_day has 4 different categorical values based on six hours time period
    X2[:, 0:(D-1)] = X[:, 0:(D-1)]  # all columns until time_of_day
    
    # we'll do a one-hot encoding for the other four columns i.e. Dth, D+1th, D+2th, D+3th
    for n in range(N): # loop through every sample
        t = int(X[n, D-1]) # the value would be either 0, 1, 2, and 3
        X2[n, t+D-1] = 1  # creating the one-hot encoding
        
    # Another way to create this one-hot encoding is to
    Z = np.zeros((N, 4))  # 4 for four columns
    Z[np.arange(N), X[:, D-1].astype(np.int32)] = 1   # X[:, D-1].astype(np.int32) gives you the value of the levels (500,)
    # X2[:, -4:] = Z
    assert(np.abs(X2[:, -4:] -Z).sum() < 10e-10)
    
    return X2, Y

In [26]:
X, Y = get_data()

In [28]:
X.shape

(500, 8)

In [29]:
Y.shape

(500,)

In [30]:
X[0:4]

array([[ 1.        , -0.81697841, -0.40827769,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ],
       [ 1.        ,  0.13967078, -0.4994283 ,  0.        ,  0.        ,
         0.        ,  1.        ,  0.        ],
       [ 1.        , -0.81697841, -1.03884323,  1.        ,  0.        ,
         1.        ,  0.        ,  0.        ],
       [ 1.        ,  0.13967078,  0.6189325 ,  1.        ,  0.        ,
         1.        ,  0.        ,  0.        ]])

In [33]:
Y[0:20]

array([ 0.,  1.,  0.,  2.,  2.,  2.,  0.,  0.,  1.,  0.,  3.,  0.,  0.,
        1.,  0.,  3.,  1.,  1.,  1.,  0.])

In [35]:
np.unique(Y) # 4 classes

array([ 0.,  1.,  2.,  3.])

In [38]:
# Now let's randomly initialize the weights as we still don't know how to train them
M = 5 # 5 hidden units
D = X.shape[1]
K = len(set(Y))
W1 = np.random.randn(D, M)
b1 = np.zeros(M)
W2 = np.random.randn(M, K)
b2 = np.zeros(K)

In [45]:
def softmax(a):
    expA = np.exp(a)
    return expA / expA.sum(axis = 1, keepdims = True)

In [46]:
def forward(X, W1, b1, W2, b2):
    Z = np.tanh(X.dot(W1) + b1)
    return softmax(Z.dot(W2) + b2)

In [47]:
P_Y_given_X = forward(X, W1, b1, W2, b2)

In [49]:
P_Y_given_X.shape   # N x K  this is an indicator matrix of probability values. let's take a look

(500, 4)

In [53]:
P_Y_given_X

array([[ 0.16607706,  0.22346357,  0.25917435,  0.35128502],
       [ 0.36218466,  0.45860361,  0.15450134,  0.02471039],
       [ 0.18222171,  0.08329165,  0.44194049,  0.29254615],
       ..., 
       [ 0.07861972,  0.0570567 ,  0.17759228,  0.68673129],
       [ 0.17408743,  0.20848054,  0.25099403,  0.36643801],
       [ 0.126506  ,  0.06029472,  0.31631904,  0.49688024]])

In [56]:
P_Y_given_X.sum(axis = 1)[0:10]   # see sum of all the probability values adds up to 1 row wise. Just showing 10 of those

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [57]:
# Lets pick the max probability row wise from the matrix P_Y_given_X

In [58]:
predictions = np.argmax(P_Y_given_X, axis = 1)

In [60]:
predictions[0:20]

array([3, 1, 2, 2, 2, 2, 2, 3, 1, 3, 1, 3, 0, 1, 3, 0, 1, 0, 3, 3], dtype=int64)

In [61]:
Y[0:20]

array([ 0.,  1.,  0.,  2.,  2.,  2.,  0.,  0.,  1.,  0.,  3.,  0.,  0.,
        1.,  0.,  3.,  1.,  1.,  1.,  0.])

In [62]:
def classification_rate(Y, P): # takes actual and predicted class
    return np.mean(Y == P)

In [63]:
print("Score: ", classification_rate(Y, predictions))

Score:  0.266


In [None]:
# By randomly assigning weights we just get 26 % accuracy. We'll leran how to train the neural network in the next section.