In [156]:
from sklearn.datasets import fetch_covtype
import numpy as np 
data = fetch_covtype(return_X_y=True)
X, y = data

In [157]:
n_samples=X.shape[0]
num_classes=len(np.unique(y))
train_split=0.8
train_size=int(n_samples*train_split)

index=np.arange(1,n_samples)
idx_perm=np.random.permutation(index)

X=X[idx_perm]
y=y[idx_perm]

X_train=X[:train_size]
y_train=y[:train_size]

X_test=X[train_size:]
y_test=y[train_size:]

In [158]:
mins=np.zeros((X_train.shape[1],1))
maxs=np.zeros((X_train.shape[1],1))

for i in range (0,X.shape[1]):
    min_=min(X_train[:,i])
    max_=max(X_train[:,i])
    X_train[:,i]=(X_train[:,i]-min_)/(max_-min_)
    mins[i]=min_
    maxs[i]=max_

for j in range (0,X.shape[1]):
    X_test[:,j]=(X_test[:,j]-mins[j])/(maxs[j]-mins[j])

In [159]:
layer_1 = 100
layer_2 = 150
batches = 128

W_1=np.random.randn(X_train.shape[1],layer_1)
b_1=np.zeros((1,layer_1))

W_2=np.random.randn(layer_1,layer_2)
b_2=np.zeros((1,layer_2))

W_3=np.random.randn(layer_2,num_classes)
b_3=np.zeros((1,num_classes))

u_W1=np.zeros((X_train.shape[1],layer_1))
u_W2=np.zeros((layer_1,layer_2))
u_W3=np.zeros((layer_2,num_classes))
u_b1=np.zeros((1,layer_1))
u_b2=np.zeros((1,layer_2))
u_b3=np.zeros((1,num_classes))

epochs=100
lr=0.001
epsilon=0.00001
gamma=0.96

In [160]:
def relu(z):
    return np.maximum(0,z)

def softmax(Z):
    Z = Z - np.max(Z, axis=1, keepdims=True)
    expZ = np.exp(Z)
    return expZ / np.sum(expZ, axis=1, keepdims=True)

def der_relu(z):
    dz = np.zeros_like(z)
    dz[z > 0] = 1
    return dz

def one_hot_enconding(y,num_classes):
    Y=np.zeros((len(y),num_classes))
    for i in range(0,num_classes):
        Y[y==i,i]=1
    return Y

In [161]:
Y=one_hot_enconding(y_train,num_classes)

def decoding(Y):
    y=np.zeros((Y.shape[0],1))
    print(Y.shape[0])
    for j in range(0,Y.shape[0]):
        y[j]=np.argmax(Y[j,:])
    return y

def forward_pass(W_1, W_2,W_3,b_1,b_2,b_3, X):
    z1 = X.dot(W_1) + b_1
    a1 = relu(z1)
    z2 = a1.dot(W_2) + b_2
    a2 = relu(z2)
    z3 = a2.dot(W_3) + b_3
    a3 = softmax(z3)
    return np.argmax(a3, axis=1)
loss=np.zeros((epochs,1))

In [162]:
print(Y.shape)

(464809, 7)


In [163]:
for epoch in range(0,epochs):
    
    index=np.arange(0,train_size)
    idx_perm=np.random.permutation(index)

    X_train=X_train[idx_perm]
    y_train=Y[idx_perm]

    batche_size=int(train_size/batches)

    for j in range(0,batches):
        # forward pass
        if j!=batches-1:
            Xbatch=X_train[j*batches:(j+1)*batches,:]
            Ybatch=Y[j*batches:(j+1)*batches,:]
        else:
            Xbatch=X_train[j*batches:,:]
            Ybatch=Y[j*batches:,:]

        z1 = Xbatch.dot(W_1) + b_1
        a1 = relu(z1)
        z2 = a1.dot(W_2) + b_2
        a2 = relu(z2)
        z3 = a2.dot(W_3) + b_3
        a3 = softmax(z3)
    
        # gradients
        dz3 = a3 - Ybatch
        dw3 = a2.T @ dz3 / Xbatch.shape[0]
        db3 = np.sum(dz3, axis=0, keepdims=True) / Xbatch.shape[0]

        da2 = dz3 @ W_3.T
        dz2 = da2 * der_relu(z2)
        dw2 =  a1.T @ dz2 / Xbatch.shape[0]
        db2 = np.sum(dz2, axis=0, keepdims=True) / Xbatch.shape[0]

        da1 = dz2 @ W_2.T
        dz1 = da1 * der_relu(z1)
        dw1 =  Xbatch.T @ dz1 / Xbatch.shape[0]
        db1 = np.sum(dz1, axis=0, keepdims=True) / Xbatch.shape[0]

        u_W3=gamma*u_W3+(1-gamma)*(dw3)**2
        u_b3=gamma*u_b3+(1-gamma)*(db3)**2
        W_3-=lr*dw3/(np.sqrt(u_W3)+epsilon)
        b_3-=lr*db3/(np.sqrt(u_b3)+epsilon)

        u_W2=gamma*u_W2+(1-gamma)*(dw2)**2
        u_b2=gamma*u_b2+(1-gamma)*(db2)**2
        W_2-=lr*dw2/(np.sqrt(u_W2)+epsilon)
        b_2-=lr*db2/(np.sqrt(u_b2)+epsilon) 

        u_W1=gamma*u_W1+(1-gamma)*(dw1)**2
        u_b1=gamma*u_b1+(1-gamma)*(db1)**2
        W_1-=lr*dw1/(np.sqrt(u_W1)+epsilon)
        b_1-=lr*db1/(np.sqrt(u_b1)+epsilon)
        
        a1=0
        a2=0
        a3=0
        z1=0
        z2=0
        z3=0
    print(epoch)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [164]:
y_pred=forward_pass(W_1,W_2,W_3,b_1,b_2,b_3,X_test)
accuracy=np.mean(y_pred==y_test)
print(accuracy)

0.48777990051806336
