# Assignemnt 2


In [None]:
# pip install opencv-python

In [2]:
import cv2
import os
import numpy as np

def one_hot_encoding(img_true_classes, num_of_classes):
    num_of_images = img_true_classes.shape[0]
    one_hot_encoded_matrix = np.zeros((num_of_images, num_of_classes))
    one_hot_encoded_matrix[np.arange(num_of_images), img_true_classes] = 1

    return one_hot_encoded_matrix

def load_dataset(data_dir, img_size = 64):
    features = []
    labels = []

    classes_names = [entry.name for entry in os.scandir(data_dir) if entry.is_dir()]
    classes_names = sorted(classes_names)

    for class_index, class_name in enumerate(classes_names):
        class_path = os.path.join(data_dir,class_name)
        print(f"Loading Class {class_index}: {class_name}")

        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            
            try:
                img_np_arr = cv2.imread(img_path)

                if img_np_arr is None:
                    continue
                
                img_resized_arr = cv2.resize(img_np_arr, (img_size,img_size))
                img_flatten_arr = img_resized_arr.flatten()
                features.append(img_flatten_arr)
                labels.append(class_index)

            except Exception as e:
                print(f"Error loading {img_name}: {e}")


    X = np.array(features)
    y = np.array(labels)

    X = X/255
    y = one_hot_encoding(y, len(classes_names))

    return X, y, classes_names



X, y, classes_names = load_dataset(r"C:\\Life\\FCAI_Stuff\\Third_year_AI\\Intro_to_ML\Assignments\Sea_Animals")
print(X)
print(y)

    

Loading Class 0: Clams
Loading Class 1: Corals
Loading Class 2: Crabs
Loading Class 3: Dolphin
Loading Class 4: Eel
Loading Class 5: Fish
Loading Class 6: Jelly_Fish
Loading Class 7: Lobster
Loading Class 8: Nudibranchs
Loading Class 9: Octopus
Loading Class 10: Otter
Loading Class 11: Penguin
Loading Class 12: Puffers
Loading Class 13: Sea_Rays
Loading Class 14: Sea_Urchins
Loading Class 15: Seahorse
Loading Class 16: Seal
Loading Class 17: Sharks
Loading Class 18: Shrimp
Loading Class 19: Squid
Loading Class 20: Starfish
Loading Class 21: Turtle_Tortoise
Loading Class 22: Whale
[[0.34117647 0.36078431 0.23137255 ... 0.54901961 0.74901961 0.63529412]
 [0.41568627 0.41176471 0.43529412 ... 0.41568627 0.38823529 0.26666667]
 [0.36078431 0.38431373 0.25490196 ... 0.92156863 0.89019608 0.64705882]
 ...
 [0.74901961 0.50980392 0.37647059 ... 0.7372549  0.4627451  0.31764706]
 [0.6745098  0.50196078 0.36470588 ... 0.25882353 0.23529412 0.14509804]
 [0.93333333 0.78039216 0.65490196 ... 0.85

In [3]:
input_layer_size = [X.shape[1]] # Number of features per image
output_layer_size = [len(classes_names)]

def build_nueral_network(in_size, out_size):
    hid_layers_num = int(input("So, how many hidden layers do you need for your MLP ?\nNumber of hidden layers: "))
    hid_sizes = []

    for i in range(hid_layers_num):
        hid_layer_size = int(input(f"Please enter the number of nuerons that you need in layer number {i + 1}: "))
        hid_sizes.append(hid_layer_size)

    activ_func = int(input("""
                           Lastly, what is the Activation function you want to use in the hidden layers?
                           1) Sigmoid
                           2) Relu
                           Choose (1/2): 
                           """))

    net_sizes = in_size + hid_sizes + out_size
    return net_sizes , activ_func

net_sizes, activ_func = build_nueral_network(input_layer_size, output_layer_size)

In [None]:
def intialize_param(net_sizes):        
    weights = []
    biases = []

    for i in range(len(net_sizes) - 1):
        input_dim = net_sizes[i] 
        output_dim =  net_sizes[i + 1]
        weight = np.random.randn(output_dim, input_dim) * 0.01
        weights.append(weight)
        bias = np.zeros((output_dim, 1))
        biases.append(bias)

    return weights, biases

weights, biases = intialize_param(net_sizes)
print(f"Structure: {net_sizes}")
print(f"first Weight Matrix shape: {weights[0].shape}")
print(f"first Bias Vector shape: {biases[0].shape}")

Structure: [12288, 100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 23]
Second Weight Matrix shape: (100, 12288)
Second Bias Vector shape: (100, 1)


# Activation functions

In [5]:
def Sigmoid(z):
    return 1 / (1 + np.exp(-z))

def Relu(z):
    return np.maximum(0, z)

def SoftMax(z):
    # 1. Numerical Stability Trick
    # If Z contains large numbers wil cause an overflow error.
    # By subtracting the max value from every column, the largest number becomes 0.
    # e^0 = 1, which is safe. The math result remains identical.
    # axis=0 means "find max down the column" (for each image separately).
    # keepdims = true => keeps the values that we divide the matrix by in a proper shape to do so (as in vecotr not elements)

    # SoftMax = e^z/sum(e^z)
    shift_z = z - np.max(z, axis=0, keepdims=True)
    exp_z = np.exp(shift_z)
    sum_exp_z = np.sum(exp_z, axis=0, keepdims= True)

    A = exp_z / sum_exp_z

    return A

# Forward propagation

In [None]:
def forward_propagation(X, weights, biases, activ_func):

    A = X.T # input for each layer initialized with the input layer

    # to save the values of A(inputs to the layer) and Z(Netj) for the backward propagation
    # Why save Z? simply because we use its derivative in the backword path
    cache_A = []
    cache_Z = []
    cache_A.append(A)

    # enumerate gives automatic counter inside loop without the need to manually increment
    # it returns [index, tubel(weights, biases)]
    for layer_index, (w,b) in enumerate(zip(weights, biases)):
        Z = np.dot(w,A) + b # Netj = Z = wx + b
        cache_Z.append(Z)

        if layer_index == len(weights) - 1: # if it is the ouput layer
            A = SoftMax(Z)
            cache_A.append(A)
        else:
            if activ_func == 1: # hidden with Sigmoid as an activation function
                A = Sigmoid(Z)
                cache_A.append(A)
            elif activ_func == 2: # hidden with Relu as an acitivation function
                A = Relu(Z)
                cache_A.append(A)
    
    return cache_A, cache_Z

a_cache, z_cache = forward_propagation(X,weights,biases,activ_func)

# Backward propagation

In [None]:
def sigmoid_derivative(z):
    sig = Sigmoid(z)
    return sig * (1 - sig)

def relu_derivative(z):
    return (z > 0).astype(float)

def backward_propagation(cache_A, cache_Z, weights, y, activ_func, m)
    num_layers = len(weights)
    weight_gradients = []
    bias_gradients = []

    # dA = Predictions - True Labels
    dA = cache_A[-1] - y.T

    # Backpropagate through each layer (from last to first)
    for layer_index in range(num_layers - 1, -1, -1):
        # Get Z and A for this layer
        Z_curr = cache_Z[layer_index]
        A_prev = cache_A[layer_index]
        W_curr = weights[layer_index]

        # Calculate dZ (derivative with respect to Z)
        if layer_index == num_layers - 1:
            dZ = dA
        else:
            # Hidden layers: dZ = dA * activation_derivative(Z)
            if activ_func == 1:  # Sigmoid
                dZ = dA * sigmoid_derivative(Z_curr)
            elif activ_func == 2:  # ReLU
                dZ = dA * relu_derivative(Z_curr)
        
        # Calculate dW (gradient for weights)
        # dW = (1/m) * dZ * A_prev.T
        dW = (1/m) * np.dot(dZ, A_prev.T)
        weight_gradients.insert(0, dW)
        
        # Calculate db (gradient for biases)
        # db = (1/m) * sum of dZ along all samples
        db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
        bias_gradients.insert(0, db)
        
        # Calculate dA for next iteration (previous layer)
        # dA_prev = W.T * dZ
        if layer_index > 0:
            dA = np.dot(W_curr.T, dZ)
    
    return weight_gradients, bias_gradients

# Test the function
weight_grads, bias_grads = backward_propagation(a_cache, z_cache, weights, y, activ_func, X.shape[0])
print(f"Number of weight gradients: {len(weight_grads)}")
print(f"First weight gradient shape: {weight_grads[0].shape}")
print(f"First bias gradient shape: {bias_grads[0].shape}")


# Loss Function

In [None]:
def calculate_loss(predictions, y_true, m):
    # Add small epsilon to avoid log(0)
    epsilon = 1e-15
    predictions = np.clip(predictions, epsilon, 1 - epsilon)
    
    # Cross-entropy for multi-class classification
    # y_true.T is (num_classes, m), predictions is (num_classes, m)
    # We multiply element-wise and sum
    loss = -(1/m) * np.sum(y_true.T * np.log(predictions))
    
    return loss

# Test the loss function
test_loss = calculate_loss(a_cache[-1], y, X.shape[0])
print(f"Initial Loss: {test_loss}")


# Update Parameters (Gradient Descent)

In [None]:
def update_parameters(weights, biases, weight_gradients, bias_gradients, learning_rate):
    updated_weights = []
    updated_biases = []
    
    for i in range(len(weights)):
        # Update weights: W = W - learning_rate * dW
        updated_W = weights[i] - learning_rate * weight_gradients[i]
        updated_weights.append(updated_W)
        
        # Update biases: b = b - learning_rate * db
        updated_b = biases[i] - learning_rate * bias_gradients[i]
        updated_biases.append(updated_b)
    
    return updated_weights, updated_biases

# Test the update function
learning_rate = 0.01
updated_weights, updated_biases = update_parameters(weights, biases, weight_grads, bias_grads, learning_rate)
print(f"Weights updated successfully!")
print(f"First weight before update: {weights[0][0, 0]:.6f}")
print(f"First weight after update: {updated_weights[0][0, 0]:.6f}")
