In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

input_size=4
output_size=3
hidden_size=8
learning_rate=0.01

def sigmoid(x):
  return 1/(1+np.exp(-x))

def sigmoidderivative(x):
  return x*(1-x)

def softmax(x):
  sm=np.exp(x-np.max(x,axis=1,keepdims=True))
  return sm/sm.sum(axis=1,keepdims=True)


def categorical_crossentropy(y_pred,y_true):
  epsilon=1e-15
  y_pred=np.clip(y_pred,epsilon,1-epsilon)
  return -np.sum(y_true * np.log(y_pred))/len(y_true)


iris=load_iris()
x=iris.data
y=iris.target

x=(x-x.mean(axis=0))/x.std(axis=0)

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

y_train_one_hot=np.eye(3)[y_train]

np.random.seed(0)
weights_input_hidden=np.random.rand(input_size,hidden_size)
hidden_bias=np.zeros((1,hidden_size))
weights_output_hidden=np.random.rand(hidden_size,output_size)
output_bias=np.zeros((1,output_size))

for epoch in range(25):

  #forwardpass
  hidden_input=x_train.dot(weights_input_hidden)+hidden_bias
  hidden_output = sigmoid(hidden_input)
  output=hidden_output.dot(weights_output_hidden)+output_bias
  predicted_output=softmax(output)

  #error
  error=y_train_one_hot-predicted_output

  #backprop
  delta_output=error
  delta_hidden=delta_output.dot(weights_output_hidden.T) * sigmoidderivative(hidden_output)

  #update
  weights_input_hidden += x_train.T.dot(delta_hidden) * learning_rate
  weights_output_hidden += hidden_output.T.dot(delta_output) * learning_rate
  hidden_bias += np.sum(delta_hidden,axis=0,keepdims=True)*learning_rate
  output_bias += np.sum(delta_output,axis=0,keepdims=True)*learning_rate

  if epoch % 5==0:
    loss=categorical_crossentropy(predicted_output,y_train_one_hot)
    print(f"epoch{epoch} : loss {loss}")
    accuracy=np.mean(np.argmax(predicted_output,axis=1)==y_train)
    print("accuracy: ",accuracy*100)


# # Testing the trained model
# hidden_layer = sigmoid(X_test.dot(weights_input_hidden) + bias_hidden)
# output_layer = softmax(hidden_layer.dot(weights_hidden_output) + bias_output)
# predicted_classes = np.argmax(output_layer, axis=1)
# accuracy = np.mean(predicted_classes == y_test)
# print(f"Testing Accuracy: {accuracy*100}")




epoch0 : loss 1.2870011687778087
accuracy:  30.0
epoch5 : loss 0.7277542563600848
accuracy:  69.16666666666667
epoch10 : loss 0.5585662107192153
accuracy:  78.33333333333333
epoch15 : loss 0.4714591788419439
accuracy:  84.16666666666667
epoch20 : loss 0.4191101346345116
accuracy:  87.5
