In [2]:
import numpy as np 
import pandas as pd  
import matplotlib.pyplot as plt  

In [3]:
from google.colab import drive  
drive.mount('/content/drive/') 

Mounted at /content/drive/


In [4]:
data_train=pd.read_csv('/content/drive/MyDrive/Data Set/classification_train.csv')  
data_test=pd.read_csv('/content/drive/MyDrive/Data Set/classification_test.csv')  
y0=np.array(data_train[['label']]) 
x0=np.array(data_train.iloc[:, 2:]) 
x_pred=np.array(data_test.iloc[:, 1:]) 
choice = np.random.choice(range(x0.shape[0]), size=(25000,), replace=False)     
ind = np.zeros(x0.shape[0], dtype=bool) 
ind[choice] = True 
rest = ~ind 
x=x0[ind, :] 
x_test=x0[rest, :] 
y=y0[ind] 
y_test=y0[rest] 
print(x_test.shape,y.shape) 

(5000, 784) (25000, 1)


In [5]:
def relu(Z, derivative): 
  if derivative: 
    Z[Z<=0]=0
    Z[Z>0]=1
    return Z
  Z=Z*(Z>0)
  return Z

In [6]:
def sigmoid(Z):
  return 1/(np.exp(-Z)+1)

In [7]:
def softmax(Z):
  exps = np.exp(Z-Z.max())  ##Making the softmax function numerically stable
  a=np.sum(exps, axis=-1).reshape(Z.shape[0], 1)
  p_i= exps /a
  return p_i

In [8]:
def dense(A, W, b, activation):
  Z=A.T.dot(W) + b
  y_hat=np.zeros_like(Z)
  if activation=="linear":
    y_hat=Z
  elif activation=="relu":
    y_hat=relu(Z, derivative=False)
  elif activation=="sigmoid":
    y_hat=sigmoid(Z)
  return y_hat, Z

In [9]:
def cost(p, y):
  loss=np.sum(np.multiply(p, y), axis=-1)
  J=-np.sum(np.log(loss))
  return J/len(y)

In [10]:
def normalized(x):
  m1=np.mean(x, axis=0)
  sig1=np.std(x, axis=0)
  sig1[sig1==0]=1 ##To avoid division by zero
  return (x-m1)/(sig1)

In [11]:
def one_hot(y):
  b=np.zeros((len(y), 10))
  for i in range(len(y)): #one-hot encoding y in b
    b[i, y[i]]=1
  return b


In [12]:
def gen_layers(n, x, W, B):
  A={}
  Z={}
  A["0"]=x
  for i in range(n):                      
    A[str(i+1)], Z[str(i+1)]=dense(normalized(A[str(i)].T), W[str(i+1)], B[str(i+1)], "relu")
  return A, Z

In [13]:
def gradients(W, B, A, Z, y_hat, y, n):
  dJ_dW={}
  dJ_db={}
  y_o=one_hot(y)
  dJ_dZ=y_hat-y_o
  dJ_dW[str(n)]=A[str(n)].T.dot(dJ_dZ)
  temp=dJ_dZ
  dJ_dW[str(n)]=(A[str(n-1)].T).dot(temp*relu(Z[str(n)], derivative=True))
  dJ_db[str(n)]=np.sum(relu(Z[str(n)], derivative=True)*temp, axis=0)
  for i in range(1, n):
    temp=temp.dot(W[str(n-i+1)].T)
    """dJ_dW[str(n-i)]=np.dot((np.multiply(A[str(n-i-1)], temp)), relu(A[str(n-i-1)], derivative=True))"""
    dJ_db[str(n-i)]=np.sum(temp*relu(Z[str(n-i)], derivative=True), axis=0) 
    dJ_dW[str(n-i)]=np.multiply(relu(Z[str(n-i)], derivative=True), temp)
    dJ_dW[str(n-i)]=(A[str(n-i-1)].T).dot(dJ_dW[str(n-i)])
  return dJ_dW, dJ_db


In [14]:
def grad_descent(alpha, W, B, y, n, iterate_n, l):
  m=x.shape[0]
  J_list=np.ones(iterate_n)*3.5
  k=np.zeros(iterate_n)
  A, Z=gen_layers(n, normalized(x), W, B)
  y_o=one_hot(y)
  for i in range(iterate_n):
    A, Z=gen_layers(n, normalized(x), W, B)
    p_i=softmax(normalized(A[str(n)]))
    dJ_dW, dJ_db=gradients(W, B, A, Z, p_i, y, n)
    for j in range(n):
      W[str(j+1)]= W[str(j+1)]- alpha *dJ_dW[str(j+1)]
      B[str(j+1)]=B[str(j+1)] - alpha*dJ_db[str(j+1)]
    J=cost(p_i, y_o)
    if J_list[i-1]<J:
      p=i
      break
    else:
      p=i

    J_list[i]=J
    k[i]=i
    print("cost", J)
  J_pl=J_list[:p]
  k=k[:p]
  plt.plot(k, J_pl , 'k')
  plt.show()
  return W, B


In [None]:

  n=input("Enter no. of hidden layers:")
  n=int(n)
  m,f=x.shape 
  N=np.zeros(n+1, dtype=int)
  N[0]=f
  for i in range(1, n+1):
    N[i]=input("Enter no. of activations in layer: ")
  W={}
  B={}
  for i in range(n):
    std=np.sqrt(2.0/N[i+1])          
    W[str(i+1)]=np.random.rand(N[i], N[i+1])*std
    W[str(i+1)]=np.array(W[str(i+1)])
    B[str(i+1)]=np.random.rand(1, N[i+1])*std
  error=0
  A_test={}
  Z_test={}
  W, B=grad_descent(0.000001, W, B, y, n, 50, 0)
  A, Z=gen_layers(n, normalized(x), W, B)
  p_i=softmax(normalized(A[str(n)]))
  p_train=one_hot(np.argmax(p_i, axis=-1))
  y_o=one_hot(y)
  tp=0
  fp=0
  for j in range(10):
    for i in range(x.shape[0]):
      if y_o[i, j]==1:
        if p_train[i, j]==y_o[i, j]:
          tp+=1
        else:
          fp+=1
  print("Training", tp/(tp+fp))
  error=0
  A_test, Z_test=gen_layers(n, normalized(x_test), W, B)  
  p_test= softmax(normalized(A_test[str(n)]))
  print(cost(p_test, one_hot(y_test)))
  p=one_hot(np.argmax(p_test, axis=-1))
  y_o2= one_hot(y_test)
  print(p_test)
  tp=0
  fp=0
  for j in range(10):
    for i in range(x_test.shape[0]):
      if y_o2[i, j]==1:
        if p[i, j]==y_o2[i, j]:
          tp+=1
        else:
          fp+=1
  print("Testing",
        tp/(tp+fp))
  Apred={}
  Z=pred={}
  A_pred, Z_pred=gen_layers(n, normalized(x_pred), W, B)  
  p_pred= softmax(normalized(A_pred[str(n)]))
  predictions=np.argmax(p_pred, axis=-1)
  Predictions=data_test.drop(data_test.columns[[i for i in range(1,785)]], axis=1)
  Predictions['Predicted']=predictions
  print(Predictions)
  Predictions.to_csv('/content/drive/MyDrive/Data Set/nn_predictions.csv', index=False)
