In [13]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# w this we read and prepare the dataset (iris)
def load_iris_data():
    data = pd.read_csv("iris.csv")
    labels = data["species"].factorize()[0]
    features = data.iloc[:, :4].values
    scaler = StandardScaler()
    features = scaler.fit_transform(features)
    X_train, X_dev, y_train, y_dev = train_test_split(
        features, labels, test_size=0.2, random_state=42, stratify=labels
    )
    return X_train.T, X_dev.T, y_train, y_dev

X_train, X_dev, y_train, y_dev = load_iris_data()
print("Train shape:", X_train.shape)
print("Dev shape:", X_dev.shape)


Train shape: (4, 120)
Dev shape: (4, 30)


In [14]:
# These functions help the model learn patterns
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)

def softmax(z):
    shifted_z = z - np.max(z, axis=0, keepdims=True)
    exp_values = np.exp(shifted_z)
    return exp_values / np.sum(exp_values, axis=0, keepdims=True)

def convert_to_one_hot(y, num_classes=3):
    m = y.shape[0]
    one_hot_matrix = np.zeros((num_classes, m))
    one_hot_matrix[y, np.arange(m)] = 1
    return one_hot_matrix

def compute_accuracy(predictions, labels):
    return np.mean(predictions == labels) * 100


In [15]:
# now We randomly create weights and biases
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    weights_1 = np.random.uniform(-0.5, 0.5, size=(hidden_size, input_size))
    bias_1 = np.zeros((hidden_size, 1))
    weights_2 = np.random.uniform(-0.5, 0.5, size=(output_size, hidden_size))
    bias_2 = np.zeros((output_size, 1))
    return {
        "W1": weights_1,
        "b1": bias_1,
        "W2": weights_2,
        "b2": bias_2
    }


In [16]:
#forward propagation through the network
def forward_pass(parameters, input_data):
    w1 = parameters["W1"]
    b1 = parameters["b1"]
    w2 = parameters["W2"]
    b2 = parameters["b2"]

    z1 = np.dot(w1, input_data) + b1
    a1 = relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = softmax(z2)

    cache = {
        "Z1": z1,
        "A1": a1,
        "Z2": z2,
        "A2": a2
    }
    return a2, cache

def backward_pass(parameters, cache, input_data, true_output):
    m = input_data.shape[1]
    w2 = parameters["W2"]
    a1 = cache["A1"]
    a2 = cache["A2"]

    dz2 = a2 - true_output
    dw2 = np.dot(dz2, a1.T) / m
    db2 = np.sum(dz2, axis=1, keepdims=True) / m

    dz1 = np.dot(w2.T, dz2) * relu_derivative(cache["Z1"])
    dw1 = np.dot(dz1, input_data.T) / m
    db1 = np.sum(dz1, axis=1, keepdims=True) / m

    gradients = {
        "dW1": dw1,
        "db1": db1,
        "dW2": dw2,
        "db2": db2
    }
    return gradients


In [17]:
# We use the gradients to adjust the model
def update_parameters(parameters, gradients, learning_rate):
    parameters["W1"] -= learning_rate * gradients["dW1"]
    parameters["b1"] -= learning_rate * gradients["db1"]
    parameters["W2"] -= learning_rate * gradients["dW2"]
    parameters["b2"] -= learning_rate * gradients["db2"]
    return parameters

def train_model(X_train, y_train, X_dev, y_dev,
                hidden_size=10, iterations=1000, learning_rate=0.05, print_interval=100):

    input_size = X_train.shape[0]
    output_size = len(np.unique(y_train))
    parameters = initialize_parameters(input_size, hidden_size, output_size)

    Y_train = convert_to_one_hot(y_train, output_size)
    Y_dev = convert_to_one_hot(y_dev, output_size)

    for i in range(1, iterations + 1):
        predictions, cache = forward_pass(parameters, X_train)
        gradients = backward_pass(parameters, cache, X_train, Y_train)
        parameters = update_parameters(parameters, gradients, learning_rate)

        if i % print_interval == 0 or i == 1:
            train_predictions = np.argmax(predictions, axis=0)
            dev_predictions = np.argmax(forward_pass(parameters, X_dev)[0], axis=0)
            print("Iteration", i)
            print("Train Accuracy:", compute_accuracy(train_predictions, y_train))
            print("Dev Accuracy:", compute_accuracy(dev_predictions, y_dev))
            print()

    return parameters


In [18]:
# finally we test the model on a single sample
trained_parameters = train_model(X_train, y_train, X_dev, y_dev,
                                 hidden_size=10, iterations=1000, learning_rate=0.05)

sample_index = 0
sample_input = X_dev[:, sample_index:sample_index+1]
true_output = y_dev[sample_index]
predicted_output = np.argmax(forward_pass(trained_parameters, sample_input)[0])

print()
print("Sample", sample_index)
print("Predicted:", predicted_output)
print("Actual:", true_output)


Iteration 1
Train Accuracy: 21.666666666666668
Dev Accuracy: 20.0

Iteration 100
Train Accuracy: 86.66666666666667
Dev Accuracy: 76.66666666666667

Iteration 200
Train Accuracy: 93.33333333333333
Dev Accuracy: 90.0

Iteration 300
Train Accuracy: 95.83333333333334
Dev Accuracy: 93.33333333333333

Iteration 400
Train Accuracy: 96.66666666666667
Dev Accuracy: 96.66666666666667

Iteration 500
Train Accuracy: 96.66666666666667
Dev Accuracy: 96.66666666666667

Iteration 600
Train Accuracy: 97.5
Dev Accuracy: 96.66666666666667

Iteration 700
Train Accuracy: 97.5
Dev Accuracy: 96.66666666666667

Iteration 800
Train Accuracy: 97.5
Dev Accuracy: 96.66666666666667

Iteration 900
Train Accuracy: 97.5
Dev Accuracy: 96.66666666666667

Iteration 1000
Train Accuracy: 97.5
Dev Accuracy: 96.66666666666667


Sample 0
Predicted: 0
Actual: 0
