In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Function to perform softmax
def softmax(z):
    exp_z = np.exp(z - np.max(z))
    return exp_z / exp_z.sum(axis=1, keepdims=True)

# Function for forward propagation in a single-layer neural network
def forward_prop(X, W, b):
    z = np.dot(X, W) + b
    return softmax(z)

# Function to compute the loss using negative log likelihood
def compute_loss(y, y_hat):
    # Using a small epsilon to avoid log(0)
    epsilon = 1e-15
    return -np.mean(np.log(y_hat[np.arange(len(y_hat)), y] + epsilon))

# Gradient of the loss with respect to z
def loss_gradient(y, y_hat):
    gradients = y_hat.copy()
    gradients[np.arange(len(y)), y] -= 1
    return gradients / len(y)

# Function to predict classes for given inputs
def predict(X, W, b):
    y_hat = forward_prop(X, W, b)
    return np.argmax(y_hat, axis=1)

# Training function for the single-layer neural network
def train(X_train, y_train, X_val, y_val, epochs, learning_rate):
    # Initialize weights and biases
    W = np.random.randn(X_train.shape[1], len(np.unique(y_train)))
    b = np.zeros((1, len(np.unique(y_train))))

    # Training loop
    for epoch in range(epochs):
        # Forward propagation
        y_hat_train = forward_prop(X_train, W, b)
        y_hat_val = forward_prop(X_val, W, b)

        # Compute loss
        loss = compute_loss(y_train, y_hat_train)
        val_loss = compute_loss(y_val, y_hat_val)

        # Backward propagation
        gradients = loss_gradient(y_train, y_hat_train)
        W -= learning_rate * np.dot(X_train.T, gradients)
        b -= learning_rate * np.sum(gradients, axis=0, keepdims=True)

        # Print loss every 10 epochs
        if epoch % 10 == 0:
            print(f'Epoch {epoch}, Loss: {loss}, Validation Loss: {val_loss}')

    return W, b

# Load Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Set hyperparameters
epochs = 100
learning_rate = 0.1

# Train the model
W, b = train(X_train_scaled, y_train, X_val_scaled, y_val, epochs, learning_rate)

# Predict on the validation set
y_pred = predict(X_val_scaled, W, b)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation accuracy: {accuracy * 100:.2f}%')


Epoch 0, Loss: 3.4255889012618836, Validation Loss: 3.6322504806110802
Epoch 10, Loss: 1.1645221750168773, Validation Loss: 1.1594517615075772
Epoch 20, Loss: 0.6103560517233885, Validation Loss: 0.5713222504802289
Epoch 30, Loss: 0.4908288921997599, Validation Loss: 0.4408921076575615
Epoch 40, Loss: 0.4393105248491456, Validation Loss: 0.3839297998093568
Epoch 50, Loss: 0.408379673081413, Validation Loss: 0.3496043182630058
Epoch 60, Loss: 0.38663484727079184, Validation Loss: 0.32551455633307413
Epoch 70, Loss: 0.3699128518480143, Validation Loss: 0.30711262876999845
Epoch 80, Loss: 0.35628993172112194, Validation Loss: 0.2922857274505119
Epoch 90, Loss: 0.3447402520864707, Validation Loss: 0.27989548128599806
Validation accuracy: 96.67%
