In [1]:
import os
import numpy as np # to process the data
import random
import math
import matplotlib.pyplot as plt
from random import shuffle
from PIL import Image
from sklearn.model_selection import train_test_split

In [10]:
class FlowerDataset:
    def __init__(self, directory):
        self.directory = directory
        self.images = []
        self.labels = []

    def load_data(self):
        image_files = os.listdir(self.directory)
        for img in image_files[:20]:
            try:
                img_path = os.path.join(self.directory, img)
                with Image.open(img_path) as img_array:
                    resized_array = img_array.resize((150, 150))
                    self.images.append(np.array(resized_array))
                    label = img.split('_')[0]
                    self.labels.append(label)
            except Exception as e:
                print(f"Error loading image {img}: {e}")

    def preprocess_data(self):
        max_pixel_value = 255.0
        self.images = [image / max_pixel_value for image in self.images]

    def print_dataset_dimensions(self):
        if self.images:
            print(f"Total number of images: {len(self.images)}")
            print(f"Dimensions of each image: {self.images[0].shape}")
            print(f"Total dimensions of the dataset (assuming all images have the same shape): {len(self.images)} x {self.images[0].shape}")
        else:
            print("The dataset is empty or not loaded properly.")

    def one_hot_encode(self):
        unique_labels = sorted(set(self.labels))
        label_to_int = {label: index for index, label in enumerate(unique_labels)}
        self.label_indices = [label_to_int[label] for label in self.labels]
        self.labels = [[int(i == label_index) for i in range(len(unique_labels))] for label_index in self.label_indices]

def split_data(images, labels, train_ratio, validation_ratio, test_ratio):
    combined = list(zip(images, labels))
    shuffle(combined)
    shuffled_images, shuffled_labels = zip(*combined)

    train_end = int(len(shuffled_images) * train_ratio)
    validation_end = train_end + int(len(shuffled_images) * validation_ratio)

    x_train = shuffled_images[:train_end]
    y_train = shuffled_labels[:train_end]
    x_val = shuffled_images[train_end:validation_end]
    y_val = shuffled_labels[train_end:validation_end]
    x_test = shuffled_images[validation_end:]
    y_test = shuffled_labels[validation_end:]

    return list(x_train), list(x_val), list(x_test), list(y_train), list(y_val), list(y_test)

# Usage
dataset = FlowerDataset('flowers')
dataset.load_data()
dataset.preprocess_data()
dataset.one_hot_encode()

x_train, x_val, x_test, y_train, y_val, y_test = split_data(dataset.images, dataset.labels, 0.75, 0.15, 0.10)

# Print the dimensions of the preprocessed images
dataset.print_dataset_dimensions()

# Print the sizes of the datasets
print(f"Train set size: {len(x_train)}, Validation set size: {len(x_val)}, Test set size: {len(x_test)}")

# Print the unique classes in the training set
unique_classes = sorted(set([label.index(1) for label in y_train]))
print(f"Unique classes in the training set: {unique_classes}")

num_classes = len(unique_classes)
print(f"Number of classes for prediction: {num_classes}")


Total number of images: 20
Dimensions of each image: (150, 150, 3)
Total dimensions of the dataset (assuming all images have the same shape): 20 x (150, 150, 3)
Train set size: 15, Validation set size: 3, Test set size: 2
Unique classes in the training set: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Number of classes for prediction: 10


In [11]:
class Limiting:
    @staticmethod
    def clip(value, min_value, max_value):
        return max(min(value, max_value), min_value)

    @staticmethod
    def min_max_scale(data):
        min_val = min(data)
        max_val = max(data)
        range_val = max_val - min_val
        if range_val == 0:
            return [0 for _ in data]  # Return a list of zeros if all values are the same
        scaled_data = [(value - min_val) / range_val for value in data]
        return scaled_data


In [27]:
# Base Layer class
class Layer:
    def forward(self, input_data):
        raise NotImplementedError

    def backward(self, output_error, learning_rate):
        raise NotImplementedError

# Convolutional Layer
class ConvLayer(Layer):
    def __init__(self, num_filters, filter_size, num_channels=3, padding=0, stride=1):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = [[[random.uniform(-1, 1) for _ in range(num_channels)] for _ in range(filter_size)] for _ in range(num_filters)]
        self.padding = padding
        self.stride = stride

    def pad_input(self, input_data):
        # Initialize the padded data with zeros for the new dimensions
        padded_data = [[[0 for _ in range(len(input_data[0][0]) + 2 * self.padding)]
                        for _ in range(len(input_data[0]) + 2 * self.padding)]
                        for _ in range(len(input_data))]

        # Apply padding to each channel of the input data
        for z in range(len(input_data)):
            for y in range(len(input_data[0])):
                for x in range(len(input_data[0][0])):
                    padded_data[z][y + self.padding][x + self.padding] = input_data[z][y][x]

        return padded_data

    def element_wise_multiplication(self, matrix1, matrix2):
        # Assuming matrix1 and matrix2 are 2D lists of the same size
        return [[matrix1[i][j] * matrix2[i][j] for j in range(len(matrix1[0]))] for i in range(len(matrix1))]

    def sum_matrix(self, matrix):
        # Sum all elements in a 2D list
        return sum(sum(row) for row in matrix)

    def clip(self, value, min_value, max_value):
        # Clip the value to be within the min and max range
        return max(min_value, min(value, max_value))

    def forward(self, input_batch):
        # Assuming input_batch is a 4D list: [batch_size, height, width, depth]
        batch_size = len(input_batch)
        output_batch = []

        for b in range(batch_size):
            input_data = input_batch[b]
            input_padded = self.pad_input(input_data)

            # Calculate output dimensions for each image in the batch
            output_height = int((len(input_padded) - self.filter_size) / self.stride) + 1
            output_width = int((len(input_padded[0]) - self.filter_size) / self.stride) + 1

            # Initialize the output for the current image as a list of lists
            output = [[0 for _ in range(output_width)] for _ in range(output_height)]

            # Perform the convolution operation for the current image
            for i in range(output_height):
                for j in range(output_width):
                    for f in range(self.num_filters):
                        vertical_start = i * self.stride
                        vertical_end = vertical_start + self.filter_size
                        horizontal_start = j * self.stride
                        horizontal_end = horizontal_start + self.filter_size

                        # Extract the patch from the padded input
                        patch = [row[horizontal_start:horizontal_end] for row in input_padded[vertical_start:vertical_end]]

                        # Perform element-wise multiplication and sum the result
                        multiplied_patch = self.element_wise_multiplication(patch, self.filters[f])
                        conv_sum = self.sum_matrix(multiplied_patch)

                        # Clip the values and assign to the output
                        output[i][j] = Limiting.clip(conv_sum, -1e2, 1e2)

            # Append the output for the current image to the output batch
            output_batch.append(output)

        return output_batch

    def backward(self, output_gradient, learning_rate):
        # Initialize gradients with zeros
        input_gradient = [[0 for _ in range(len(self.last_input[0]))] for _ in range(len(self.last_input))]
        filter_gradients = [[[0 for _ in range(len(self.filters[0][0]))] for _ in range(len(self.filters[0]))] for _ in range(len(self.filters))]

        # Loop over the output gradient
        for i in range(len(output_gradient)):
            for j in range(len(output_gradient[0])):
                for f in range(len(self.filters)):
                    # Calculate the position in the input
                    vertical_start = i * self.stride
                    vertical_end = vertical_start + self.filter_size
                    horizontal_start = j * self.stride
                    horizontal_end = horizontal_start + self.filter_size

                    # Extract the patch from the last input
                    input_patch = [row[horizontal_start:horizontal_end] for row in self.last_input[vertical_start:vertical_end]]

                    # Calculate the filter gradient
                    for m in range(self.filter_size):
                        for n in range(self.filter_size):
                            filter_gradients[f][m][n] += input_patch[m][n] * output_gradient[i][j]

                    # Calculate the input gradient
                    for m in range(self.filter_size):
                        for n in range(self.filter_size):
                            input_gradient[vertical_start + m][horizontal_start + n] += self.filters[f][m][n] * output_gradient[i][j]

        # Update the filters
        for f in range(len(self.filters)):
            for m in range(self.filter_size):
                for n in range(self.filter_size):
                    self.filters[f][m][n] -= learning_rate * filter_gradients[f][m][n]

        return input_gradient
        
# MaxPooling Layer
class MaxPoolingLayer:
    def __init__(self, pool_size):
        self.pool_size = pool_size
        self.last_input = None

    def forward(self, input_batch):
        # Assuming input_batch is a 4D list: [batch_size, height, width, depth]
        batch_size = len(input_batch)
        output_batch = []

        for b in range(batch_size):
            input_data = input_batch[b]
            # Calculate output dimensions for each image in the batch
            output_height = len(input_data) // self.pool_size
            output_width = len(input_data[0]) // self.pool_size
            output_depth = len(input_data[0][0])

            # Initialize the output for the current image as a list of lists
            output = [[[0 for _ in range(output_depth)] for _ in range(output_width)] for _ in range(output_height)]

            # Perform the max pooling operation for the current image
            for z in range(output_depth):
                for i in range(output_height):
                    for j in range(output_width):
                        # Find the maximum value in the current pool
                        pool = [input_data[z][i * self.pool_size + di][j * self.pool_size + dj]
                                for di in range(self.pool_size) for dj in range(self.pool_size)]
                        output[i][j][z] = max(pool)

            # Append the output for the current image to the output batch
            output_batch.append(output)

        return output_batch

    def backward(self, output_gradient):
        input_gradient = [[[0 for _ in range(len(self.last_input[0][0]))] for _ in range(len(self.last_input[0]))] for _ in range(len(self.last_input))]

        for z in range(len(self.last_input[0][0])):
            for i in range(len(output_gradient)):
                for j in range(len(output_gradient[0])):
                    patch = [self.last_input[i * self.pool_size + di][j * self.pool_size + dj][z]
                             for di in range(self.pool_size) for dj in range(self.pool_size)]
                    max_value = max(patch)
                    for di in range(self.pool_size):
                        for dj in range(self.pool_size):
                            if self.last_input[i * self.pool_size + di][j * self.pool_size + dj][z] == max_value:
                                input_gradient[i * self.pool_size + di][j * self.pool_size + dj][z] = output_gradient[i][j][z]
                                break
        return input_gradient
        
class FlattenLayer:
    def forward(self, input_data):
        # Save the input shape to rebuild it during the backward pass
        self.last_input_shape = self.get_shape(input_data)
        
        # Flatten the input data
        flattened_data = self.flatten(input_data)
        return flattened_data

    def backward(self, output_error):
        # Reshape the output error to the original input shape
        reshaped_error = self.reshape(output_error, self.last_input_shape)
        return reshaped_error

    def flatten(self, input_data):
        # Recursively flatten the input data
        if isinstance(input_data[0], list):
            return [item for sublist in input_data for item in self.flatten(sublist)]
        else:
            return input_data

    def reshape(self, output_error, shape):
        # Recursively build the nested lists with the given shape
        if len(shape) == 1:
            return output_error
        else:
            step = len(output_error) // shape[0]
            return [self.reshape(output_error[i * step:(i + 1) * step], shape[1:]) for i in range(shape[0])]

    def get_shape(self, lst):
        # Get the shape of nested lists
        shape = []
        while isinstance(lst, list):
            shape.append(len(lst))
            lst = lst[0] if lst else None
        return tuple(shape)
        
# Dropout Layer
class DropoutLayer:
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate
        self.mask = None

    def forward(self, input_data):
        # Create a mask with the same shape as the input data
        self.mask = [[random.uniform(0, 1) > self.dropout_rate for _ in neuron] for neuron in input_data]
        # Apply the mask to the input data
        dropped_out_data = [[neuron[i] * self.mask[neuron_index][i] for i in range(len(neuron))] for neuron_index, neuron in enumerate(input_data)]
        return dropped_out_data

    def backward(self, output_error):
        # Apply the mask to the output error
        if self.mask is not None:
            return [[output_error[neuron_index][i] * self.mask[neuron_index][i] for i in range(len(neuron))] for neuron_index, neuron in enumerate(output_error)]
        else:
            return output_error

# Dense Layer
class DenseLayer:
    def __init__(self, input_size, output_size):
        # Initialize weights and biases
        self.weights = [[random.uniform(-1, 1) for _ in range(output_size)] for _ in range(input_size)]
        self.biases = [0 for _ in range(output_size)]
        self.last_input = None

    def forward(self, input_data):
        self.last_input = input_data
        # Perform matrix multiplication and add biases
        output = [self.vector_dot_product(input_data, col) + self.biases[i] for i, col in enumerate(zip(*self.weights))]
        return output

    def backward(self, output_error, learning_rate):
        # Calculate gradients for weights and biases
        d_weights = [[self.last_input[i] * output_error[j] for j in range(len(output_error))] for i in range(len(self.last_input))]
        d_biases = output_error

        # Update weights and biases
        for i in range(len(self.weights)):
            for j in range(len(self.weights[i])):
                self.weights[i][j] -= learning_rate * d_weights[i][j]
        for i in range(len(self.biases)):
            self.biases[i] -= learning_rate * d_biases[i]

        # Calculate input error
        input_error = [self.vector_dot_product(row, output_error) for row in self.weights]
        return input_error

    def vector_dot_product(self, vec1, vec2):
        # Calculate dot product of two vectors
        return sum(x * y for x, y in zip(vec1, vec2))
        
# Softmax Layer
class SoftmaxLayer:
    def forward(self, input_data):
        # Assuming input_data is a list of lists (2D array)
        # Each inner list represents a single data sample
        
        # Clip values to a reasonable range for numerical stability
        clipped_input = self.clip(input_data, -1e2, 1e2)
        
        # Calculate the softmax for each sample
        softmax_output = [self.compute_softmax(sample) for sample in clipped_input]
        return softmax_output

    def compute_softmax(self, sample):
        # Subtract the max value from each element for numerical stability
        max_val = max(sample)
        shifted_inputs = [x - max_val for x in sample]
        
        # Calculate the exponentials using math.exp
        exps = [math.exp(x) for x in shifted_inputs]
        
        # Sum of the exponentials
        sum_of_exps = sum(exps)
        
        # Normalize each value
        return [exp_val / sum_of_exps for exp_val in exps]

    def clip(self, matrix, min_value, max_value):
        # Clip the values of a 2D list element-wise
        return [[max(min_value, min(value, max_value)) for value in row] for row in matrix]

    def backward(self, output_error, learning_rate):
        # Error is passed straight through to the next layer
        return output_error
        
# ReLU Activation
class ReLULayer:
    def forward(self, input_data):
        # Apply ReLU to each element in the input data recursively
        self.last_input = input_data
        return self.apply_relu(input_data)

    def apply_relu(self, data):
        # Recursively apply ReLU to data of any dimension
        if isinstance(data, list):
            return [self.apply_relu(sub_data) for sub_data in data]
        else:
            return max(0, data)

    def backward(self, output_error):
        # Compute the gradient of ReLU for the last input
        return self.compute_gradient(output_error, self.last_input)

    def compute_gradient(self, error, last_input):
        # Recursively compute the gradient of ReLU for data of any dimension
        if isinstance(error, list):
            return [self.compute_gradient(sub_error, sub_last_input)
                    for sub_error, sub_last_input in zip(error, last_input)]
        else:
            return error * (1 if last_input > 0 else 0)
        
# Cross-Entropy Loss
class CrossEntropyLoss:
    def calculate_loss(self, predicted, actual):
        # Assuming 'predicted' and 'actual' are lists of lists
        # Each inner list represents the probabilities for a single data sample
        # 'actual' is expected to be one-hot encoded
        
        # Calculate the cross-entropy loss for each sample
        losses = [self.compute_sample_loss(p, a) for p, a in zip(predicted, actual)]
        
        # Calculate the average loss over all samples
        return sum(losses) / len(losses)

    def compute_sample_loss(self, predicted_sample, actual_sample):
        # Calculate the cross-entropy loss for a single sample
        # Add a small constant (epsilon) to prevent log(0)
        epsilon = 1e-9
        return -sum([act * self.safe_log(pred + epsilon) for pred, act in zip(predicted_sample, actual_sample)])

    def safe_log(self, x):
        # Compute the natural log safely
        if x <= 0:
            return -float('inf')  # Logarithm of non-positive number is undefined
        else:
            return math.log(x)

    def calculate_gradient(self, predicted, actual):
        # Assuming 'predicted' and 'actual' are lists of lists
        # Each inner list represents the probabilities for a single data sample
        # 'actual' is expected to be one-hot encoded
        
        # Calculate the gradient of the loss with respect to the predictions
        gradients = [self.compute_sample_gradient(p, a) for p, a in zip(predicted, actual)]
        return gradients

    def compute_sample_gradient(self, predicted_sample, actual_sample):
        # Calculate the gradient for a single sample
        # Add a small constant (epsilon) to prevent division by zero
        epsilon = 1e-9
        return [(pred - act) / (pred + epsilon) for pred, act in zip(predicted_sample, actual_sample)]

In [30]:
class CNN:
    def __init__(self, num_classes, epochs=10, learning_rate=0.001, threshold=0.5, early_stopping_patience=5):
        self.layers = [
            ConvLayer(num_filters=32, filter_size=3, num_channels=3),  # num_channels is 3 for RGB images
            ReLULayer(),
            MaxPoolingLayer(pool_size=2),
            ConvLayer(num_filters=64, filter_size=3, num_channels=32),  # num_channels is num_filters of previous ConvLayer
            ReLULayer(),
            MaxPoolingLayer(pool_size=2),
            ConvLayer(num_filters=128, filter_size=3, num_channels=64),  # num_channels is num_filters of previous ConvLayer
            ReLULayer(),
            MaxPoolingLayer(pool_size=2),
            ConvLayer(num_filters=256, filter_size=3, num_channels=128),  # num_channels is num_filters of previous ConvLayer
            ReLULayer(),
            MaxPoolingLayer(pool_size=2),
            ConvLayer(num_filters=512, filter_size=3, num_channels=256),  # num_channels is num_filters of previous ConvLayer
            ReLULayer(),
            MaxPoolingLayer(pool_size=2),
            FlattenLayer(),
            None,  # Placeholder for first DenseLayer, will initialize properly later
            ReLULayer(),
            DropoutLayer(dropout_rate=0.5),
            None,  # Placeholder for second DenseLayer, will initialize properly later
            ReLULayer(),
            DropoutLayer(dropout_rate=0.5),
            None,  # Placeholder for third DenseLayer, will initialize properly later
            SoftmaxLayer()
        ] 
        self.loss = CrossEntropyLoss()
        self.num_classes = num_classes
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.threshold = threshold
        self.early_stopping_patience = early_stopping_patience
        self.best_loss = np.inf
        self.patience_counter = 0

    # Function to replace np.expand_dims()
    def expand_dims(self, lst, axis):
        if axis == 0:
            return [lst]
        else:
            return [[item] for item in lst]
    
    # Function to replace np.array()
    def to_list(self, nested_iterables):
        return [to_list(item) if isinstance(item, (list, tuple)) else item for item in nested_iterables]
    
    # Function to perform element-wise multiplication
    def elementwise_multiply(self, lst1, lst2):
        if isinstance(lst1[0], list):
            return [elementwise_multiply(sub1, sub2) for sub1, sub2 in zip(lst1, lst2)]
        else:
            return [a * b for a, b in zip(lst1, lst2)]
    
    # Function to perform dot product
    def dot_product(self, vec1, vec2):
        return sum(x * y for x, y in zip(vec1, vec2))
    
    def forward(self, X):
        for layer in self.layers:
            if layer is not None:
                # Ensure X is a list of lists
                if not isinstance(X, list):
                    X = self.to_list(X)
                print(f"+ Before forward - {type(layer).__name__}: {self.get_shape(X)}")
                X = layer.forward(X)
                print(f"- After forward - {type(layer).__name__}: {self.get_shape(X)}")
        return X

    def backward(self, output_error, learning_rate):
        for layer in reversed(self.layers):
            if layer is not None:
                print(f"+ Before backward - {type(layer).__name__}: {output_error.shape}")
                output_error = layer.backward(output_error, learning_rate)
                print(f"- After backward - {type(layer).__name__}: {output_error.shape}")

    def reset(self):
        for layer in self.layers:
            if hasattr(layer, 'last_input'):
                layer.last_input = None
    
    def validate(self, X_val, y_val):
        val_losses = []
        val_accuracies = []
        
        # Iterate over each example in the validation set
        for i in range(len(X_val)):
            # Forward pass
            input_data = [X_val[i]]  # Mimic np.expand_dims
            output = self.forward(input_data)
        
            # Calculate loss for the current validation example
            val_loss = self.loss_function(output, y_val[i])
            val_losses.append(val_loss)
            
            # Calculate accuracy for the current validation example
            predicted_label = output.index(max(output))
            val_accuracy = 1 if predicted_label == y_val[i] else 0
            val_accuracies.append(val_accuracy)
        
        # Calculate average loss and accuracy over the entire validation set
        avg_val_loss = sum(val_losses) / len(val_losses)
        avg_val_accuracy = sum(val_accuracies) / len(val_accuracies)
        
        return avg_val_loss, avg_val_accuracy
    
    def calculate_loss(self, X, y):
        predictions = self.forward(X)
        return self.loss_function(predictions, y)
    
    def calculate_accuracy(self, X, y):
        predictions = self.predict(X)
        correct_predictions = sum(1 for pred, actual in zip(predictions, y) if pred == actual)
        return correct_predictions / len(y)
    
    def min_max_scale(self, data):
        min_val = min(data)
        max_val = max(data)
        range_val = max_val - min_val
        if range_val == 0:
            return [0 for _ in data]  # Return a list of zeros if all values are the same
        scaled_data = [(value - min_val) / range_val for value in data]
        return scaled_data

    def get_shape(self, lst):
        shape = []
        while isinstance(lst, list):
            shape.append(len(lst))
            lst = lst[0] if lst else None
        return tuple(shape)
        
    def train(self, X_train, y_train, X_val, y_val):
        print(f"Initial training data shape: {X_train[0].shape}\n")
        
        # Calculate the output size after the convolution and pooling layers
        sample_output = self.forward(np.expand_dims(X_train[0], axis=0))
        flattened_size = sample_output.size
        
        # Initialize the DenseLayers with the correct input size
        self.layers[-6] = DenseLayer(input_size=flattened_size, output_size=128)  # First DenseLayer
        self.layers[-3] = DenseLayer(input_size=128, output_size=64)  # Second DenseLayer
        self.layers[-1] = DenseLayer(input_size=64, output_size=self.num_classes)  # Third DenseLayer

        print("\n  |====== Start training ======| \n")

        train_losses = []
        train_accuracies = []
        val_losses = []
        val_accuracies = []
        
        for epoch in range(self.epochs):
            batch_losses = []
            batch_accuracies = []
            
            for j in range(len(X_train)):
                print(f"\n=== start round {j}/{len(X_train)-1} - epoch {epoch} ===\n")
                
                # forward
                input_data = [X_train[j]]  # Mimic np.expand_dims
                output = self.forward(input_data)

                # print("---------------------------------------")
                
                # backward
                output_error = self.loss.calculate_gradient(output, y_train[j])
                # Clip output error
                threshold = 1e2  # Define an appropriate threshold
                output_error = [[self.clip(value, -threshold, threshold) for value in row] for row in output_error]
                output_error = self.backward(output_error, self.learning_rate)
                                
                # Calculate loss and accuracy after the backward pass
                batch_loss = self.loss.calculate_loss(output, y_train[j])
                batch_losses.append(batch_loss)
                predicted_label = output.index(max(output))
                batch_accuracy = 1 if predicted_label == y_train[j] else 0
                batch_accuracies.append(batch_accuracy)
            
                self.reset()  # Reset the network's state after each forward pass
                print(f"\n=== end round {j} - epoch {epoch} ===\n")

            print(f"\n=== end epoch {epoch} ===\n")
            
            # Calculate the average training loss and accuracy for the epoch
            avg_train_loss = np.mean(batch_losses)
            avg_train_loss = self.min_max_scale(avg_train_loss)
            avg_train_accuracy = np.mean(batch_accuracies)
            avg_train_accuracy = self.min_max_scale(avg_train_accuracy)
            
            # Append the average metrics to the lists that track them across epochs
            train_losses.append(avg_train_loss)
            train_accuracies.append(avg_train_accuracy)

            self.reset()

            # Validation step
            val_loss, val_accuracy = self.validate(X_val, y_val)
            val_loss = self.min_max_scale(val_loss)
            val_accuracy = self.min_max_scale(val_accuracy)
            val_losses.append(val_loss)
            val_accuracies.append(val_accuracy)
            
            print(f"Epoch: {epoch}, Train Loss: {avg_train_loss}, Train Accuracy: {avg_train_accuracy}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")
    
            if val_loss < self.best_loss:
                self.best_loss = val_loss
                self.patience_counter = 0
            else:
                self.patience_counter += 1
            if self.patience_counter >= self.early_stopping_patience:
                print("Early stopping due to no improvement in validation loss.")
                break

        print("Training finished.")
        print(f"Best validation accuracy achieved: {self.best_loss}")

        self.plot_results(train_losses, train_accuracies, val_losses, val_accuracies)

    def predict(self, X):
        predictions = []
        for i in range(len(X)):
            # Forward pass without NumPy
            input_data = [X[i]]  # Mimic np.expand_dims
            output = self.forward(input_data)
            predicted_label = output.index(max(output))  # Use pure Python to get the class index
            predictions.append(predicted_label)
        return predictions
    
    def plot_results(self, train_losses, train_accuracies, val_losses, val_accuracies):
        actual_epochs = min(len(train_losses), len(train_accuracies), len(val_losses), len(val_accuracies))
        assert actual_epochs > 0, "No data to plot."
    
        # If 'val_losses' is a nested list, flatten it
        if isinstance(val_losses[0], list):
            val_losses = [loss for sublist in val_losses for loss in sublist]
        
        epochs = range(1, actual_epochs + 1)
    
        plt.figure(figsize=(12, 4))
        plt.subplot(1, 2, 1)
        plt.plot(epochs, train_losses[:actual_epochs], 'g', label='Training loss')
        plt.plot(epochs, val_losses[:actual_epochs], 'b', label='Validation loss')
        plt.title('Training and Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
    
        plt.subplot(1, 2, 2)
        plt.plot(epochs, train_accuracies[:actual_epochs], 'g', label='Training accuracy')
        plt.plot(epochs, val_accuracies[:actual_epochs], 'b', label='Validation accuracy')
        plt.title('Training and Validation Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()
    
        plt.tight_layout()
        plt.show()


In [29]:
# Adjust the values to train
num_classes = num_classes
epochs = 1
learning_rate = 0.1
threshold = 0.5
early_stopping_patience = 5

cnn = CNN(num_classes, epochs, learning_rate, threshold, early_stopping_patience)
cnn.train(x_train, y_train, x_val, y_val)

Initial training data shape: (150, 150, 3)

+ Before forward - ConvLayer: (1,)


TypeError: can't multiply sequence by non-int of type 'float'