In [17]:
from google.colab import drive

In [18]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import math
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [20]:
data_Train=pd.read_csv('/content/drive/MyDrive/classification_train.csv')
data_Train = data_Train.drop(columns='Unnamed: 0')
Y_Data=data_Train.label
print(Y_Data)
print(data_Train)
data_Train_one_hot_encoded=pd.get_dummies(data_Train, columns=['label'])
data_Train_one_hot_encoded['label']=Y_Data
print(data_Train_one_hot_encoded)

0        8
1        4
2        1
3        8
4        2
        ..
29995    3
29996    4
29997    9
29998    4
29999    6
Name: label, Length: 30000, dtype: int64
       label  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0          8       0       0       0       0       0       0       0       0   
1          4       0       0       0       0       0       0       0       0   
2          1       0       0       0       0       0       0       0       0   
3          8       0       0       0       0       0       0       0       0   
4          2       0       0       0       0       1       0       3       0   
...      ...     ...     ...     ...     ...     ...     ...     ...     ...   
29995      3       0       0       0       0       0       0       0       0   
29996      4       0       0       0       0       0       0       0       0   
29997      9       0       0       0       0       0       0       0       0   
29998      4       0       0       0  

In [21]:
X_data=data_Train_one_hot_encoded.to_numpy()
X=X_data[:,0:784]
print(X.shape)

(30000, 784)


In [22]:
Y_data= data_Train_one_hot_encoded.to_numpy()
Y=Y_data[:,784:796]
print(Y.shape)

(30000, 11)


In [23]:
def normalize_data(x):
 x=x/255
 return x

 #  mu=np.mean(x,axis=0)
#  sigma=np.std(x,axis=0)
#  x=(x-mu)/sigma

In [24]:
X=normalize_data(X)
print(X)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [25]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train, y_test = train_test_split(X,Y,random_state=10, test_size=0.2, shuffle=True)
y_label_train=y_train[:,10].reshape(-1,1)
y_train=y_train[:,0:10]
y_label_test=y_test[:,10].reshape(-1,1)
y_test=y_test[:,0:10]

In [37]:
class NeuralNetwork:


  def init_params(architecture):
    weights=[]
    bias=[]
    np.random.seed(10)
    for i in range(architecture.shape[1]-1):
        a=np.random.randn(architecture[0][i],architecture[0][i+1])*math.sqrt(2/(architecture[0][i+1]))     
        weights.append(a)
    for i in range(architecture.shape[1]-1):
        # random.seed(15)
        c=np.random.randn(1,architecture[0][i+1])*math.sqrt(2/(architecture[0][i+1]))
        bias.append(c)   

    return weights,bias   




  def ReLU(Z):
    return np.maximum(Z, 0)




  # def softmax(Z): 
  #   A = np.exp(Z) / np.sum(np.exp(Z),axis=0)
  #   return A





  def forward_prop(w,b,X,architecture):
    all_activations=[]
    z_all=[]
    x=X
    for i in range(architecture.shape[1]-2):
      z=np.dot(x,w[i])+b[i]
    #   print(z.shape)
      z_all.append(z)
      a=NeuralNetwork.ReLU(z)
    #   print(a.shape)
      x=a
    #   print(x.shape)
      all_activations.append(a)

    ypred=np.dot(x,w[architecture.shape[1]-2])+b[architecture.shape[1]-2]
    # print(ypred.shape)
    z_all.append(ypred) 
    activation = np.exp(ypred) / np.sum(np.exp(ypred),axis=0)
    # print(activation.shape)
    all_activations.append(activation)
    y_predicted=np.argmax(activation,axis=1,keepdims = True)
    # print(y_predicted.shape)
    return y_predicted,all_activations,z_all





  def ReLU_deriv(Z):
    return Z > 0




  def backward_prop(X,Y,architecture,w,b,all_activations,z,Y_labell):
    m=X.shape[0]
    derivative_w=[]
    derivative_b=[]
    a=all_activations[-1]-Y
    lossarray=np.multiply(all_activations[-1],Y)
    lossarray[np.where(lossarray==0)]=1
    log=np.log(lossarray)
    loss=np.sum(log)*(-1/m)

    dz=a
    dW = 1 / m * np.dot(all_activations[-2].T,dz)
    db = 1 / m * np.sum(dz,axis=0,keepdims = True)
    # print(dz.shape)
    # print(dW.shape)
    # print(db.shape)
    derivative_w.append(dW)
    derivative_b.append(db)
      
    for i in range(architecture.shape[1]-3):
      dZi = np.dot(dz,w[architecture.shape[1]-2-i].T) * NeuralNetwork.ReLU_deriv(z[architecture.shape[1]-3-i])
    #   print(dZi.shape)
      dwi= 1 / m * np.dot(all_activations[architecture.shape[1]-4-i].T,dZi)
    #   print(dwi.shape)
      dbi= 1 / m * np.sum(dZi,axis=0,keepdims = True)
    #   print(dbi.shape)
      derivative_w.append(dwi)
      derivative_b.append(dbi)
      dz=dZi
    #   print(dZi.shape)
    #   print(dwi.shape)
    #   print(dbi.shape)
       
    dz1=np.dot(dz,w[1].T) * NeuralNetwork.ReLU_deriv(z[0])
    dW1 = 1 / m * np.dot(X.T,dz1)
    db1= 1 / m * np.sum(dz1,axis=0,keepdims = True)
    # print(dz1.shape)
    # print(dW1.shape)       
    # print(db1.shape)
    derivative_w.append(dW1)
    derivative_b.append(db1)
    derivative_w.reverse()
    derivative_b.reverse()
    return derivative_w,derivative_b,loss

  
  def update_params(w,b,dw,db,alpha,a):
    for i in range(a.shape[1]-1):
      w[i] = w[i] +(alpha*dw[i])
      b[i] = b[i] -(alpha*db[i])

    return w, b

  def gradient_descent(X, Y, alpha, iterations,architecture,Y_label):
    losses=[]
    w,b=NeuralNetwork.init_params(architecture)
    for i in range(iterations):
      y_hat,act,zall=NeuralNetwork.forward_prop(w,b,X,architecture)
      DW,DB,loss=NeuralNetwork.backward_prop(X,Y,architecture,w,b,act,zall,Y_label)
      w_updated,b_updated=NeuralNetwork.update_params(w,b,DW,DB,alpha,architecture)
      w=w_updated
      b=b_updated
      print("after",i,"iterations")
      print(loss)
      NeuralNetwork.get_accuracy(y_hat,Y_label)
      losses.append(loss)

         
    return w,b,losses 
   
  def get_accuracy(y__predicted,y):
    print(np.sum(y__predicted == y) / y.size)


  def accPlot(acc, iters):
      plt.plot(np.arange(iters), acc, '-b') 
      plt.xlabel('Number of iterations')
      plt.ylabel('loss')
      plt.show()  


 



In [39]:
a=np.array([[784,200,100,17,16,15,14,13,10]])
w,b=NeuralNetwork.init_params(a)
alpha=0.001
iterations=100
# print(w[0].shape)
# print(w[1].shape)
# print(w[2].shape)
# print(w[3].shape)
# print(w[4].shape)
# print(w[5].shape)

# print(b[0].shape)
# print(b[1].shape)
# print(b[2].shape)
# print(b[3].shape)
# print(b[4].shape)
# print(b[5].shape)

# print()
# print()
# print()
# yhat,act,zall=NeuralNetwork.forward_prop(w,b,X_train,a)
# # # print(yhat.shape)
# # print(act[-1].shape)
# print()
# print()
# print()
# DW,DB,loss=NeuralNetwork.backward_prop(X_train,y_train,a,w,b,act,zall,y_label_train)
# print(DW[0].shape)
# print(DW[1].shape)
# print(DW[2].shape)
# print(DW[3].shape)
# print(DW[4].shape)

# print(DB[0].shape)
# print(DB[1].shape)
# print(DB[2].shape)
# w_updated,b_updated=NeuralNetwork.update_params(w,b,DW,DB,alpha,a)
# print(w_updated[0].shape,b_updated[0].shape)
# print(w_updated[1].shape,b_updated[1].shape)
# print(w_updated[2].shape,b_updated[2].shape)
w_final,b_final,loss_arr=NeuralNetwork.gradient_descent(X_train,y_train,alpha,iterations,a,y_label_train)

after 0 iterations
11.248733096523264
0.08208333333333333
after 1 iterations
11.313632705327446
0.08058333333333334
after 2 iterations
11.386189228864987
0.079
after 3 iterations
11.467747169010183
0.07854166666666666
after 4 iterations
11.558686707690072
0.079
after 5 iterations
11.658593830868616
0.07983333333333334
after 6 iterations
11.769635241272898
0.07954166666666666
after 7 iterations
11.889733475833374
0.079125
after 8 iterations
12.018128485999588
0.077875
after 9 iterations
12.15383109743252
0.076875
after 10 iterations
12.298940871583756
0.07608333333333334
after 11 iterations
12.450842690811527
0.073


KeyboardInterrupt: ignored

In [None]:
NeuralNetwork.accPlot(loss_arr,1000)
# print(loss_arr)