<a href="https://colab.research.google.com/github/Kentucky-Kaneki/Learning-Machine-Learning-/blob/main/9_Perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

In [2]:
# Activation functions and derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

# Loss function: cross-entropy
def cross_entropy(y_true, y_pred):
    # Add a small epsilon to avoid log(0)
    epsilon = 1e-9

    # Calculate log of predicted probabilities
    log_preds = np.log(y_pred + epsilon)

    # Multiply true labels by log of predictions (selects the log prob of the true class)
    weighted_log_preds = y_true * log_preds

    # Sum over classes for each sample
    sample_losses = np.sum(weighted_log_preds, axis=1)

    # Average the losses across all samples and negate
    loss = -np.mean(sample_losses)

    return loss

In [3]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.DataFrame(iris.target, columns=['target'])

# Preprocess
scaler = StandardScaler()
X = scaler.fit_transform(X)

encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y)  # 150x3

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [4]:
# MLP architecture
input_size = 4
hidden_size = 6
output_size = 3
lr = 0.1
epochs = 300

# Initialize weights
np.random.seed(0)
W1 = np.random.randn(input_size, hidden_size) # 4x6 matrix
b1 = np.zeros((1, hidden_size)) # bias is 0 initially
W2 = np.random.randn(hidden_size, output_size) # 6x3 matrix
b2 = np.zeros((1, output_size)) # bias coeff is 0 initially

In [5]:
# Training loop
for epoch in range(epochs):
    # Forward pass
    z1 = np.dot(X_train, W1) + b1 # yin for input to hidden layer
    a1 = sigmoid(z1) #
    z2 = np.dot(a1, W2) + b2 # yin for the output layer
    a2 = softmax(z2) # final y at the output layer

    # Loss
    loss = cross_entropy(y_train, a2)

    # Backpropagation
    error_output = a2 - y_train  # shape: (n_samples, output_size)
    dW2 = np.dot(a1.T, error_output)
    db2 = np.sum(error_output, axis=0, keepdims=True)

    error_hidden = np.dot(error_output, W2.T) * sigmoid_derivative(a1)
    dW1 = np.dot(X_train.T, error_hidden)
    db1 = np.sum(error_hidden, axis=0, keepdims=True)

    # Update weights
    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

    # print loss every 50 epochs
    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 1.2202
Epoch 50, Loss: 0.0723
Epoch 100, Loss: 0.0518
Epoch 150, Loss: 0.0474
Epoch 200, Loss: 0.0444
Epoch 250, Loss: 0.0416


In [6]:
# Evaluate
z1 = np.dot(X_test, W1) + b1
a1 = sigmoid(z1)
z2 = np.dot(a1, W2) + b2
a2 = softmax(z2)

predictions = np.argmax(a2, axis=1)
y_true = np.argmax(y_test, axis=1)

accuracy = np.mean(predictions == y_true)
print(f"\nTest Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 100.00%
