In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Load and prepare the data
iris = load_iris()
X = iris.data
y = iris.target.reshape(-1, 1)  # Convert to column vector

# One-hot encode the target
encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Normalize the data
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

In [5]:
## Neural Network Implementation ##

class simpleNeuralNetwork:
    def __init__(self,input,Hidden,output):
      self.W1= np.random.rand(input,Hidden)*0.01
      self.b1=np.zeros((1,Hidden))
      self.W2=np.random.rand(Hidden,output)*0.01
      self.b2=np.zeros((1,output))

    def sigmoid(self,x):
      return 1/(1+np.exp(-x))

    def sigmoid_derivative(self,x):
      return x*(1-x)

    def softmax(self,x):
      exp_x=np.exp(x-np.max(x,axis=1,keepdims=True))
      return exp_x/np.sum(exp_x,axis=1,keepdims=True)

    def forward(self,x):
      #hidden layer
      self.z1=np.dot(x,self.W1)+self.b1
      self.a1=self.sigmoid(self.z1)
      #output layer
      self.z2=np.dot(self.a1,self.W2)+self.b2
      self.a2=self.softmax(self.z2)

      return self.a2

    def CE_loss (self,y_true,y_Pred):
      m = y_true.shape[0]
      loss=-np.sum(y_true*np.log(y_Pred+1e-15)) / m
      return loss
    def backward(self, X, y_true, y_pred):
        m = y_true.shape[0]

        # Output layer gradient
        dz2 = y_pred - y_true
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Hidden layer gradient
        dz1 = np.dot(dz2, self.W2.T) * self.sigmoid_derivative(self.a1)
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        return dW1, db1, dW2, db2

    def update_weights(self, dW1, db1, dW2, db2, learning_rate):
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

    def train(self, X, y, epochs, learning_rate):
        losses = []
        for epoch in range(epochs):
            # Forward pass
            y_pred = self.forward(X)

            # Compute loss
            loss = self.CE_loss(y, y_pred)
            losses.append(loss)

            # Backward pass
            dW1, db1, dW2, db2 = self.backward(X, y, y_pred)

            # Update weights
            self.update_weights(dW1, db1, dW2, db2, learning_rate)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

        return losses

    def predict(self, X):
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)



In [8]:
# Create and train the network
input_size = X_train.shape[1]
Hidden_size = 5
output_size = y_train.shape[1]

nn = simpleNeuralNetwork(input_size, Hidden_size, output_size)
losses = nn.train(X_train, y_train, epochs=1000, learning_rate=0.1)

# Evaluate
train_preds = nn.predict(X_train)
test_preds = nn.predict(X_test)

train_accuracy = np.mean(np.argmax(y_train, axis=1) == train_preds)
test_accuracy = np.mean(np.argmax(y_test, axis=1) == test_preds)

print(f"rain Accuracy: {train_accuracy:.2f}")
print(f"Test Accuracy: {test_accuracy:.2f}")

Epoch 0, Loss: 1.0987
Epoch 100, Loss: 1.0982
Epoch 200, Loss: 1.0800
Epoch 300, Loss: 0.8113
Epoch 400, Loss: 0.5467
Epoch 500, Loss: 0.4434
Epoch 600, Loss: 0.3816
Epoch 700, Loss: 0.3344
Epoch 800, Loss: 0.2953
Epoch 900, Loss: 0.2617
rain Accuracy: 0.96
Test Accuracy: 0.97
