In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data =pd.read_csv("/kaggle/input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv")
data2=pd.read_csv("/kaggle/input/sign-language-mnist/sign_mnist_test/sign_mnist_test.csv")



In [4]:
data = np.array(data)
data2= np.array(data2)
m, n = data.shape
print(m)
np.random.shuffle(data) # shuffle before splitting into dev and training sets
np.random.shuffle(data2)
data_train = data[0:m].T
data_test = data2[0:m].T
Y_train = data_train[0]
X_train = data_train[1:n]
Y_test = data_test[0]
X_test = data_test[1:n]
 
_,m_train = X_train.shape
X_train=X_train/255
X_test=X_test/255



27455


In [5]:
def initial_parameters():
    W1=np.random.randn(100,784)*np.sqrt(2/784)
    W3=np.random.randn(26,100)*np.sqrt(2/40)
    B1=np.zeros((100,1)) 
    B3=np.zeros((26,1))
    vW1= np.zeros_like(W1)
    vW3=np.zeros_like(W3)
    vB1=np.zeros_like(B1)
    vB3=np.zeros_like(B3)
    
    return W1,W3,B1,B3,vW1,vW3,vB1,vB3
    
def softmax(Z):
    Z_shifted = Z - np.max(Z, axis=0, keepdims=True)
    
    # Apply the Softmax function
    exp_Z = np.exp(Z_shifted)
    sum_exp_Z = np.sum(exp_Z, axis=0, keepdims=True)
    A=exp_Z / sum_exp_Z
    A=np.clip(A,1e-10,1.0)
    return A
def create_mini_batches(X,Y,batch_size=100):
    m=X.shape[1]
    mini_batches=[]
    for i in range(0,m,batch_size):
        X_mini=X[:,i:i+batch_size]
        Y_mini=Y[:,i:i+batch_size]
        mini_batches.append((X_mini,Y_mini))
    return mini_batches
def dropout(A,dropout_rate):
    D=np.random.rand(A.shape[0],A.shape[1])<(1-dropout_rate)
    A*=D
    A/=(1-dropout_rate)#scale activations
    return A
def learning_rate_decay(alpha,epoch,decay_rate=0.0001):
    return alpha/(1+decay_rate*epoch)
def LeakyReLU(Z,alpha=0.01):
    return np.where(Z>0,Z,alpha*Z)
def forward_propagation(W1,W3,B1,B3,X):
    Z1=W1.dot(X)+B1
    A1=LeakyReLU(Z1)
    A1=dropout(A1,0.0)#20%dropout
    Z3=W3.dot(A1)+B3
    A3=softmax(Z3)
    return Z1,Z3,A1,A3    
def LeakyReLU_deriv(Z,alpha=0.1):
    return np.where(Z>0,1,alpha)
def get_y(Z):
    Y=np.zeros((Z.max()+2,Z.size))#we added 2 specifically since our dataset has no values for z=25
    Y[Z,np.arange(Z.size)]=1
    return Y
def backward_propagation(m,X,Z1,Z3,A1,A3,W1,W3,Y):
    dZ3=A3-Y#same due to usage of softmax
    dW3=(1/m)*(dZ3.dot(A1.T))
    dB3 = (1/m) * np.sum(dZ3, axis=1, keepdims=True)
    dA1=W3.T.dot(dZ3)
    dZ1=dA1*LeakyReLU_deriv(Z1)
    dW1=(1/m)*dZ1.dot(X.T)
    dB1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
   
    return dW1,dW3,dB1,dB3
    
def update_parameters(W1,W3,B1,B3,dW1,dW3,dB1,dB3,vW1,vW3,vB1,vB3,alpha,beta):
    #updating the velocity terms
    vW1 = beta * vW1 + (1 - beta) * dW1
    vW3 = beta * vW3 + (1 - beta) * dW3
    vB1 = beta * vB1 + (1 - beta) * dB1
    vB3 = beta * vB3 + (1 - beta) * dB3
    #updating the parameters now:
    W1=W1-alpha*vW1
    W3=W3-alpha*vW3
    B1=B1-alpha*vB1
    B3=B3-alpha*vB3
    return W1,W3,B1,B3,vW1,vW3,vB1,vB3
def get_predictions(A3):
    return np.argmax(A3,0)#0->max index per column.....1->for max index per row
def get_accuracy(predictions,Y):
    return np.sum(predictions==Y)/Y.size
def gradient_descent_step(X,alpha,Y_train,m,iterations,batch_size,beta):
    W1,W3,B1,B3,vW1,vW3,vB1,vB3=initial_parameters()
    
    Y=get_y(Y_train)
    for i in range(iterations):
        mini_batches=create_mini_batches(X,Y,batch_size)
        alpha=learning_rate_decay(alpha,i)
        for X_mini,Y_mini in mini_batches:
            Z1,Z3,A1,A3=forward_propagation(W1,W3,B1,B3,X_mini)
            dW1,dW3,dB1,dB3=backward_propagation(m,X_mini,Z1,Z3,A1,A3,W1,W3,Y_mini)        
            W1,W3,B1,B3,vW1,vW3,vB1,vB3=update_parameters(W1,W3,B1,B3,dW1,dW3,dB1,dB3,vW1,vW3,vB1,vB3,alpha,beta)
        if(i%10==0):
            print("Iteration: ", i)
            Z1,Z3,A1,A3=forward_propagation(W1,W3,B1,B3,X)
            predictions=get_predictions(A3)            
            print(get_accuracy(predictions,Y_train))
    return W1,W3,B1,B3

W1,W3,B1,B3=gradient_descent_step(X_train,0.4,Y_train,m,200,1000,0.9)

Iteration:  0
0.05328719723183391
Iteration:  10
0.4287379348024039
Iteration:  20
0.5401566199235112
Iteration:  30
0.6087415771262065
Iteration:  40
0.66264796940448
Iteration:  50
0.703150610089237
Iteration:  60
0.7326898561282098
Iteration:  70
0.7528319067565107
Iteration:  80
0.7679839737752686
Iteration:  90
0.7785102895647423
Iteration:  100
0.7909670369695866
Iteration:  110
0.8012019668548533
Iteration:  120
0.8094700418867238
Iteration:  130
0.8170460753961027
Iteration:  140
0.8224367146239301
Iteration:  150
0.8262247313786195
Iteration:  160
0.8304498269896193
Iteration:  170
0.8335822254598434
Iteration:  180
0.8361318521216536
Iteration:  190
0.8378437443088691


In [8]:
Z1,Z3,A1,A3=forward_propagation(W1,W3,B1,B3,X_test)
predictions=get_predictions(A3)
print(get_accuracy(predictions,Y_test))

0.6645287228109314


In [7]:
np.save("/kaggle/working/W1.npy", W1)
np.save("/kaggle/working/W3.npy", W3)
np.save("/kaggle/working/B1.npy", B1)
np.save("/kaggle/working/B3.npy", B3)