In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load the iris dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)


In [3]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        #weights and biases for the hidden layer
        self.W1 = np.random.randn(self.input_size, self.hidden_size)
        self.b1 = np.zeros((1, self.hidden_size))
        
        #weights and biases for the output layer
        self.W2 = np.random.randn(self.hidden_size, self.output_size)
        self.b2 = np.zeros((1, self.output_size))
        
    def relu(self, z):
        return np.maximum(0, z)
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=1, keepdims=True)
    
    def forward(self, X):
        # activation of the hidden layer
        self.hidden_activation = self.relu(np.dot(X, self.W1) + self.b1)
        
        # output of the neural network
        self.output_activation = self.softmax(np.dot(self.hidden_activation, self.W2) + self.b2)
        
        return self.output_activation
    
    def backward(self, X, y, output_activation, learning_rate):
        dW2 = np.dot(self.hidden_activation.T, (output_activation - y))
        db2 = np.sum(output_activation - y, axis=0, keepdims=True)
        
        dW1 = np.dot(X.T, np.dot(output_activation - y, self.W2.T) * (self.hidden_activation > 0))
        db1 = np.sum(np.dot(output_activation - y, self.W2.T) * (self.hidden_activation > 0), axis=0, keepdims=True)
    
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
    
    def train(self, X, y, learning_rate, num_epochs):

        for epoch in range(num_epochs):
            # Forward
            output_activation = self.forward(X)
            
            # Backward
            self.backward(X, y, output_activation, learning_rate)
            
            # Print the loss every 10 epochs
            if (epoch+1) % 10 == 0:
                loss = np.mean(-np.sum(y * np.log(output_activation), axis=1))
                print("Epoch {0}/{1} - loss: {2}".format(epoch+1, num_epochs, loss))
    
    def predict(self, X):
        # Compute the output of the neural network for the given input
        output_activation = self.forward(X)
        
        # Convert the output to a one-hot encoded vector
        y_pred = np.zeros_like(output_activation)
        #y_pred[np.arange(len(output_activation)), output_activation
        y_pred[np.arange(len(output_activation)), output_activation.argmax(1)] = 1
    
        return y_pred


In [10]:
input_size = X_train.shape[1]
hidden_size = 100
output_size = 3
learning_rate = 0.1
num_epochs = 100

nn = NeuralNetwork(input_size, hidden_size, output_size)

In [11]:
nn.train(X_train, np.eye(output_size)[y_train], learning_rate, num_epochs)

Epoch 10/100 - loss: nan
Epoch 20/100 - loss: nan
Epoch 30/100 - loss: nan
Epoch 40/100 - loss: nan
Epoch 50/100 - loss: nan
Epoch 60/100 - loss: nan
Epoch 70/100 - loss: nan
Epoch 80/100 - loss: nan
Epoch 90/100 - loss: nan
Epoch 100/100 - loss: nan


In [12]:
y_pred = nn.predict(X_test)
accuracy = np.mean(np.equal(y_pred, np.eye(output_size)[y_test]))
print("Accuracy: {0}".format(accuracy))

Accuracy: 0.5555555555555556
