In [76]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [77]:
def z_normalize(x):
  m,n = x.shape
  mean = np.zeros(n)
  std = np.zeros(n)
  for i in range(n):
    mean[i] = np.mean(x[:,i])
    std[i] = np.std(x[:,i])
    x[:,i] = (x[:,i] - mean[i])/std[i]
  return x

In [78]:
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

In [79]:
def softmax(z):
  exp_z = np.exp(z)
  return (exp_z / np.sum(exp_z, axis=0, keepdims=True))

In [80]:
def relu(z):
  return (np.maximum(0, z))

In [81]:
def relu_derivative(z):
  return np.array(z > 0)

In [82]:
def leaky_relu(z):
  return np.maximum(0.01*z, z)

In [83]:
def leaky_relu_derivative(z):
  return np.array(z > 0)

In [84]:
def tanh(z):
  return np.tanh(z)

In [85]:
def tanh_derivative(z):
  return (1 - np.power(z,2))

In [104]:
def initial_parameters(layers,y):
  parameters = {}

  if y.shape[0] != 1:
    for i in range(1, len(layers)):
      parameters[f"w{i}"] = np.zeros((layers[i], layers[i - 1])) / np.sqrt(layers[i - 1])
      parameters[f"b{i}"] = np.zeros((layers[i], 1))

  else:
    for i in range(1, len(layers)):
      parameters[f"w{i}"] = np.random.randn(layers[i], layers[i - 1]) / np.sqrt(layers[i - 1])
      parameters[f"b{i}"] = np.zeros((layers[i], 1))
  return parameters

In [87]:
def forward_propagation(x, parameters, activation):
  L = len(parameters) // 2
  forward_cache = {}
  forward_cache = {"a0": x}

  for i in range(1, L):
    forward_cache[f"z{i}"] = np.dot(parameters[f"w{i}"], forward_cache[f"a{i-1}"]) + parameters[f"b{i}"]
    a = activation[i - 1](forward_cache[f"z{i}"])
    forward_cache[f"a{i}"] = a

  forward_cache[f"z{L}"] = np.dot(parameters[f"w{L}"] ,forward_cache[f"a{L - 1}"]) + parameters[f"b{L}"]

  if forward_cache[f"z{L}"].shape[0] == 1:
    forward_cache[f"a{L}"] = sigmoid(forward_cache[f"z{L}"])

  else:
    forward_cache[f"a{L}"] = softmax(forward_cache[f"z{L}"])

  return forward_cache[f"a{L}"], forward_cache

In [88]:
def nn_cost(a,y):
  m = y.shape[1]
  if y.shape[0] == 1:
    cost = -(1 / m) * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))

  else:
    cost = -(1 / m) * np.sum(y * np.log(a))

  cost = np.squeeze(cost)
  return cost

In [89]:
def backward_propagation(a,y, parameters, forward_cache, activation_derivative):
  gradient = {}
  m = y.shape[1]
  L = len(parameters) // 2

  gradient[f"dz{L}"] = a - y
  gradient[f"dw{L}"] = (1 / m) * np.dot(gradient[f"dz{L}"], forward_cache[f"a{L - 1}"].T)
  gradient[f"db{L}"] = (1 / m) * np.sum(gradient[f"dz{L}"], axis=1, keepdims=True)

  for i in reversed(range(1, L)):
    gradient[f"dz{i}"] = np.dot(parameters[f"w{i + 1}"].T, gradient[f"dz{i + 1}"]) * activation_derivative[i - 1](forward_cache[f"a{i}"])
    gradient[f"dw{i}"] = (1 / m) * np.dot(gradient[f"dz{i}"], forward_cache[f"a{i - 1}"].T)
    gradient[f"db{i}"] = (1 / m) * np.sum(gradient[f"dz{i}"], axis=1, keepdims=True)

  return gradient

In [90]:
def update_parameter(parameters, gradient, alpha):
  L = len(parameters) // 2

  for i in range(1, L + 1):
    parameters[f"w{i}"] -= alpha * gradient[f"dw{i}"]
    parameters[f"b{i}"] -= alpha * gradient[f"db{i}"]

  return parameters

In [91]:
def nn(x, y, layers, activation, activation_derivative, alpha, num_itr):
  parameters = initial_parameters(layers,y)

  for i in range(num_itr):
    a , forward_cache = forward_propagation(x, parameters, activation)
    cost = nn_cost(a, y)
    gradient = backward_propagation(a, y, parameters, forward_cache, activation_derivative)
    parameters = update_parameter(parameters, gradient, alpha)

    if i % 100 == 0:
      print(f"Iteration {i} \t Cost: {cost}")

  return parameters

In [92]:
def nn_train_data(path):
  path = path.strip("'")
  df = pd.read_csv(path)

  u,v = df.shape
  v -= 3
  a_start = int(input("give the starting index of training data:"))
  a_end = int(input("give the ending index of training data:"))
  a = a_end - a_start
  x = np.zeros((v,a))
  y_binary = np.zeros((1,a))
  y_multi = np.zeros((1,a))

  df = df.iloc[a_start:a_end,1:]
  y_binary[0] = df[df.columns[-2]].to_numpy()
  y_multi[0] = df[df.columns[-1]].to_numpy()
  for i in range(v):
    x[i] = df[df.columns[i]].to_numpy()

  return (x,y_binary,y_multi)

In [119]:
def nn_test_data(path):
  path = path.strip("'")
  df = pd.read_csv(path)

  u,v = df.shape
  v -= 1
  x = np.zeros((v,u))
  y_binary = np.zeros((1,u))
  y_multi = np.zeros((1,u))

  df = df.iloc[:,1:]
  for i in range(v):
    x[i] = df[df.columns[i]].to_numpy()

  return (x,y_binary,y_multi)

In [94]:
x,y_binary,y_multi = nn_train_data('/content/nn_train.csv')

give the starting index of training data:0
give the ending index of training data:1000


In [120]:
x_test,y_binary_test,y_multi_test = nn_test_data('/content/nn_test.csv')

In [96]:
def multi_to_binary(y):
  m = y.shape[1]
  a = int(y.max() + 1)
  y_new = np.zeros((a,m))
  for i in range(m):
    y_new[int(y[0,i]),i] = 1
  return y_new

In [97]:
y_multi = multi_to_binary(y_multi)

In [98]:
def layer_data(layers):
  n = int(input("Enter the number of hidden layers: "))
  for i in range(n):
    layers.insert(-1,int(input(f"Enter the number of neurons in hidden layer {i+1}: ")))
  return layers

In [99]:
def activation_data(n):
  activation = []
  activation_derivatives = []
  print("1. Relu")
  print("2. Leaky Relu")
  print("3. Tanh")
  for i in range(n):
    a = int(input(f"Enter the activation function for hidden layer {i+1}: "))
    if a == 1:
      activation.append(relu)
      activation_derivatives.append(relu_derivative)
    elif a == 2:
      activation.append(leaky_relu)
      activation_derivatives.append(leaky_relu_derivative)
    elif a == 3:
      activation.append(tanh)
      activation_derivatives.append(tanh_derivative)
    else:
      print("Invalid choice")
      return

  return activation,activation_derivatives

In [101]:
layers_binary = [x.shape[0],y_binary.shape[0]]
layers_binary = layer_data(layers_binary)

activation_binary,activation_derivatives_binary = activation_data(len(layers_binary)-1)

alpha_binary = float(input("Enter the value of alpha(binary): "))
num_itr_binary = int(input("Enter the number of iteration(binary): "))

Enter the number of hidden layers: 2
Enter the number of neurons in hidden layer 1: 10
Enter the number of neurons in hidden layer 2: 15
1. Relu
2. Leaky Relu
3. Tanh
Enter the activation function for hidden layer 1: 1
Enter the activation function for hidden layer 2: 1
Enter the activation function for hidden layer 3: 1
Enter the value of alpha(binary): 0.01
Enter the number of iteration(binary): 1000


In [105]:
paremeters_binary = nn(x,y_binary, layers_binary, activation_binary, activation_derivatives_binary, alpha_binary, num_itr_binary)

Iteration 0 	 Cost: 6.304962478365336


  cost = -(1 / m) * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))
  cost = -(1 / m) * np.sum(y * np.log(a) + (1 - y) * np.log(1 - a))


Iteration 100 	 Cost: 0.6931477291551332
Iteration 200 	 Cost: 0.6931443622999547
Iteration 300 	 Cost: 0.6931423214764506
Iteration 400 	 Cost: 0.6931410844277626
Iteration 500 	 Cost: 0.6931403345878396
Iteration 600 	 Cost: 0.6931398800701932
Iteration 700 	 Cost: 0.693139604562755
Iteration 800 	 Cost: 0.6931394375628416
Iteration 900 	 Cost: 0.6931393363351233


In [106]:
for i in range(1,len(activation_binary)+1):
  print(f"\nlayer{i}\n")
  print(f"w{i} = {paremeters_binary[f'w{i}']}")
  print(f"b{i} = {paremeters_binary[f'b{i}']}")


layer1

w1 = [[-0.1557513  -0.12458216 -0.11631228 ... -0.12707726 -0.09532716
  -0.11181375]
 [-0.0055739   0.00725826 -0.00936287 ...  0.00116176  0.01586837
   0.020085  ]
 [-0.02601411 -0.01328722  0.04791601 ... -0.01006036 -0.00460792
  -0.02471957]
 ...
 [ 0.01009714 -0.0233073  -0.05337481 ... -0.0052181   0.0062138
   0.02249559]
 [-0.71849306 -0.69635839 -0.75940632 ... -0.69882275 -0.65009489
  -0.60088225]
 [-0.04147392  0.02054735 -0.00526697 ...  0.02135037 -0.0196903
   0.01140915]]
b1 = [[-6.74274889e-04]
 [-2.85994938e-05]
 [-3.24920693e-04]
 [ 0.00000000e+00]
 [ 8.74044947e-06]
 [-3.08036493e-03]
 [-8.50878861e-05]
 [ 0.00000000e+00]
 [-7.13775268e-03]
 [-1.35528549e-05]]

layer2

w2 = [[ 1.52643818e-01  2.32761514e-01  2.61470889e-01  2.44933787e-01
  -1.49030412e-01 -8.12266116e-01 -2.22926123e-02 -1.81299186e-02
  -4.52015742e-01  1.43226024e-01]
 [-3.52041569e-01 -5.95951361e-01 -5.01291547e-01 -2.49413210e-01
  -2.16102727e-01  2.39069307e+00  3.76866455e-01  3.

In [107]:
layers_multi = [x.shape[0],y_multi.shape[0]]
layers_multi = layer_data(layers_multi)

activation,activation_derivatives = activation_data(len(layers_multi)-1)

alpha_multi = float(input("Enter the value of alpha(multi class): "))
num_itr_multi = int(input("Enter the number of iteration(multi class): "))

Enter the number of hidden layers: 2
Enter the number of neurons in hidden layer 1: 10
Enter the number of neurons in hidden layer 2: 15
1. Relu
2. Leaky Relu
3. Tanh
Enter the activation function for hidden layer 1: 1
Enter the activation function for hidden layer 2: 1
Enter the activation function for hidden layer 3: 1
Enter the value of alpha(multi class): 0.001
Enter the number of iteration(multi class): 1000


In [108]:
paramater_multi = nn(x,y_multi, layers_multi, activation, activation_derivatives, alpha_multi, num_itr_multi)

Iteration 0 	 Cost: 2.3978952727983707
Iteration 100 	 Cost: 2.3968974141110775
Iteration 200 	 Cost: 2.3959174100975598
Iteration 300 	 Cost: 2.394954819578268
Iteration 400 	 Cost: 2.3940092153079857
Iteration 500 	 Cost: 2.393080183434767
Iteration 600 	 Cost: 2.3921673229838025
Iteration 700 	 Cost: 2.391270245364875
Iteration 800 	 Cost: 2.3903885739021797
Iteration 900 	 Cost: 2.389521943385338


In [109]:
for i in range(1,len(activation)+1):
  print(f"\nlayer{i}\n")
  print(f"w{i} = {paramater_multi[f'w{i}']}")
  print(f"b{i} = {paramater_multi[f'b{i}']}")


layer1

w1 = [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
b1 = [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]

layer2

w2 = [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
b2 = [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]

layer3

w3 = [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [110]:
def test(x,parameter,activation):
  a,_=forward_propagation(x,parameter,activation)
  return a

In [111]:
x_cv,y_cv_binary,y_cv_multi = nn_train_data('/content/nn_train.csv')
y_cv_multi = multi_to_binary(y_cv_multi)

give the starting index of training data:2000
give the ending index of training data:2500


In [112]:
y_hat_binary = test(x_cv,paremeters_binary,activation_binary)
y_hat_multi = test(x_cv,paramater_multi,activation)
print(y_hat_binary)
print(y_hat_multi)

[[0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827
  0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.5017827 0.

In [113]:
y_hat_binary = np.squeeze(y_hat_binary)
y_hat_multi = np.squeeze(y_hat_multi)
for i in range(len(y_hat_binary)):
  if y_hat_binary[i] >= 0.5:
    y_hat_binary[i] = 1
  else:
    y_hat_binary[i] = 0

y_hat_multi = np.squeeze(y_hat_multi)
for i in range(y_hat_multi.shape[1]):
  a = max(y_hat_multi[:,i])
  for j in range(y_hat_multi.shape[0]):
    if y_hat_multi[j,i] == a:
      y_hat_multi[j,i] = 1
    else:
      y_hat_multi[j,i] = 0

In [114]:
def F1_score(y_hat,y):
  tp = 0
  tn = 0
  fp = 0
  fn = 0
  y = np.squeeze(y)
  y_hat = np.squeeze(y_hat)
  for i in range(len(y)):
    if y_hat[i] ==1 and y[i] == 1:
      tp += 1
    elif y_hat[i] ==0 and y[i] == 0:
      tn += 1
    elif y_hat[i] == 0 and y[i] == 1:
      fn += 1
    else:
      fp += 1

  precision = tp / (tp + fp + 1e-10)
  recall = tp / (tp + fn + 1e-10)
  F1 = 2 * precision * recall / (precision + recall + 1e-10)
  return F1,tp,fp,fn

In [115]:
F1,tp,tn,fn = F1_score(y_hat_binary,y_cv_binary)
print(F1)

0.6631016042335726


In [122]:
def F1_score_multi(y_hat,y):
  f1 = np.zeros(y_cv_multi.shape[0])
  tn = 0
  fp = 0
  fn = 0
  for i in range(y_cv_multi.shape[0]):
    f1[i],a,b,c = F1_score(y_hat_multi[i],y_cv_multi[i])
    tn += a
    fp += b
    fn += c

  precision = tn / (tn + fp + 1e-10)
  recall = tn / (tn + fn + 1e-10)
  F1_macro = np.mean(f1)
  F1_micro = 2 * precision * recall / (precision + recall + 1e-10)
  print(f"f1 :{f1}")
  print(f"macro_f1 : {F1_macro}")
  print(f"micro_f1 : {F1_micro}")
  return F1,F1_macro,F1_micro

In [117]:
F1,F1_macro,F1_micro = F1_score_multi(y_hat_multi,y_cv_multi)
print(f"F1: {F1}")
print(f"macro F1: {F1_macro}")
print(f"micro F1: {F1_micro}")

f1 :[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.14126394 0.        ]
macro_f1 : 0.012842176409751362
micro_f1 : 0.0759999999499848
F1: 0.6631016042335726
macro F1: 0.012842176409751362
micro F1: 0.0759999999499848


In [121]:
y_binary_test = test(x_test,paremeters_binary,activation_binary)
y_multi_trst = test(x_test,paramater_multi,activation)

In [123]:
m,n = y_multi_trst.shape
y_multi_testtest = np.zeros(n)
for i in range(n):
  for j in range(m):
    if y_multi_trst[j,i] == 1:
      y_multi_trst[i] = j
print(y_multi_trst)

[[0.08329987 0.08329987 0.08329987 ... 0.08329987 0.08329987 0.08329987]
 [0.09210508 0.09210508 0.09210508 ... 0.09210508 0.09210508 0.09210508]
 [0.09010224 0.09010224 0.09010224 ... 0.09010224 0.09010224 0.09010224]
 ...
 [0.09122903 0.09122903 0.09122903 ... 0.09122903 0.09122903 0.09122903]
 [0.09325614 0.09325614 0.09325614 ... 0.09325614 0.09325614 0.09325614]
 [0.09175369 0.09175369 0.09175369 ... 0.09175369 0.09175369 0.09175369]]


In [124]:
np.savetxt('neural network binary test result.csv', y_binary_test, delimiter=',')
np.savetxt("neural networkmulti test result.csv'", y_multi_trst, delimiter=',')