Creating a forward prop network using the Ecommerce data

https://github.com/lazyprogrammer/machine_learning_examples/blob/master/ann_logistic_extra/ann_predict.py


In [1]:
%run 'ECommerce.ipynb' #runs the notebook with the data prep work

[[ 1.          0.          0.65750995  0.          3.          0.        ]
 [ 1.          1.          0.56857123  0.          2.          1.        ]
 [ 1.          0.          0.042246    1.          1.          0.        ]
 ..., 
 [ 0.          0.          0.1728534   1.          3.          0.        ]
 [ 1.          0.          0.2099644   0.          3.          0.        ]
 [ 0.          0.          2.61688195  1.          3.          0.        ]]
(500L, 5L)
(500L,)
[[ 1.         -1.89931526 -1.37410378 ...,  0.          0.          1.        ]
 [ 1.         -0.89931526 -1.46304249 ...,  0.          1.          0.        ]
 [ 1.         -1.89931526 -1.98936773 ...,  1.          0.          0.        ]
 ..., 
 [ 0.         -1.89931526 -1.85876033 ...,  0.          0.          1.        ]
 [ 1.         -1.89931526 -1.82164933 ...,  0.          0.          1.        ]
 [ 0.         -1.89931526  0.58526823 ...,  0.          0.          1.        ]]


In [2]:
X, Y = get_data()

In [3]:
#Lets set some parameters

M = 5 #Number of hidden units
D = X.shape[1] #Number of features. [1] is axis, in this case: 8
K = len(set(Y)) #Number of output classes. 'Set' is an "unordered collection of unique elements". Very cool

W1 = np.random.randn(D, M) #an 8x5 matrix, 
b1 = np.zeros(M) #a 5x1 array
W2 = np.random.randn(M, K) #a 5x4 matrix
b2 = np.zeros(K) #a 4x1 array

In [4]:
#Define our softmax function

def softmax(a):
    expA = np.exp(a)
    return expA / expA.sum(axis=1, keepdims=True)

In [5]:
#Create our forward prop function. Notice this time we are using tanh rather than sigmoid as our activation function

def forward(X, W1, b1, W2, b2):
    Z = np.tanh(X.dot(W1) + b1) #.dot is matrix multiplication
    return softmax(Z.dot(W2) + b2)
    

In [6]:
#Getting the outputs

P_Y_given_X = forward(X, W1, b1, W2, b2)
predictions = np.argmax(P_Y_given_X, axis=1) #Predictions

Note that 'P_Y_given_X' will return a 500x4 matrix. This is the number of inputs and the number of outputs. The outputs will be
in probabilities of likeyhoods. ex.[ 0.28172556,  0.21584919,  0.25067366,  0.25175159]. The argmax will take the highest number
from each row (the likelyhood of which category it should go in.) and will indicate which column it is in. (In this case 0-4)

In [7]:
#Calculate accuracy

def classification_rate(Y, P): #takes in 'targets' and 'predictions'
    return np.mean(Y == P) # number correct answers divided by total

In [8]:
print "Score:", classification_rate(Y, predictions)

Score: 0.178


I expected 25%. We'll see how it develops once we start to train it

In [9]:
np.mean(Y)

0.748

In [10]:
np.mean(predictions)

1.6899999999999999

In [11]:
np.mean(Y == predictions)

0.17799999999999999

In [12]:
P_Y_given_X

array([[ 0.19714023,  0.01758698,  0.75599731,  0.02927548],
       [ 0.2278813 ,  0.01485784,  0.69625401,  0.06100685],
       [ 0.02011641,  0.02858025,  0.92467299,  0.02663035],
       ..., 
       [ 0.19387182,  0.04493136,  0.70114653,  0.06005029],
       [ 0.24308289,  0.02242113,  0.697645  ,  0.03685099],
       [ 0.05242782,  0.27534583,  0.60961857,  0.06260779]])

In [13]:
predictions

array([2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2,
       1, 2, 2, 2, 2, 3, 1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1,
       0, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1,
       2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 0, 2, 2, 2, 2, 2, 1, 1, 2, 2,
       2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 3, 2, 2, 1, 2, 1, 2, 2, 2, 2,
       1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 0, 2, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2,
       1, 2, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2,
       2, 2, 3, 2, 2, 2, 2, 2, 2, 0, 2, 1, 1, 0, 2, 2, 2, 1, 2, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2,
       2, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1,
       2, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2,
       2, 2,

In [14]:
P_Y_given_X.sum(axis=0)

array([  65.00136008,  115.42522244,  265.36399944,   54.20941804])

In [15]:
W1

array([[-0.05044311, -0.55835037, -0.17279349,  0.23484872,  1.32395406],
       [ 0.71110169,  0.54462538, -0.71487878, -0.42893933, -2.11999973],
       [ 0.57439436,  0.97736134,  0.608778  , -0.2995514 , -0.21239249],
       [-0.2616803 ,  0.80928768, -1.4002453 ,  0.83436383,  0.31563695],
       [-0.8860903 ,  1.82013202,  0.17479108, -0.65067514,  0.36893838],
       [ 2.83827726, -0.94901018, -1.19293027, -0.49187607,  0.33751685],
       [ 0.25822207, -0.50813324,  1.52867664, -0.12112986, -0.67526804],
       [ 0.85749192, -1.00729908, -1.04541308, -1.19503429,  2.09885751]])

In [16]:
b2.shape

(4L,)

In [17]:
W2.shape

(5L, 4L)

In [18]:
X.shape

(500L, 8L)

In [19]:
X.shape[0]

500L

In [20]:
set(Y)

{0.0, 1.0, 2.0, 3.0}

In [21]:
K

4