In [1]:
import os
import numpy as np # to process the data
import random
import math
from random import shuffle
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
class FlowerDataset:
    def __init__(self, directory):
        self.directory = directory
        self.images = []
        self.labels = []

    def load_data(self):
        image_files = os.listdir(self.directory)
        for img in image_files[:50]:
            try:
                img_path = os.path.join(self.directory, img)
                with Image.open(img_path) as img_array:
                    resized_array = img_array.resize((150, 150))
                    self.images.append(np.array(resized_array))
                    label = img.split('_')[0]
                    self.labels.append(label)
            except Exception as e:
                print(f"Error loading image {img}: {e}")

    def preprocess_data(self):
        max_pixel_value = 255.0
        self.images = [image / max_pixel_value for image in self.images]

    def print_dataset_dimensions(self):
        if self.images:
            print(f"Total number of images: {len(self.images)}")
            print(f"Dimensions of each image: {self.images[0].shape}")
            print(f"Total dimensions of the dataset (assuming all images have the same shape): {len(self.images)} x {self.images[0].shape}")
        else:
            print("The dataset is empty or not loaded properly.")

    def one_hot_encode(self):
        unique_labels = sorted(set(self.labels))
        label_to_int = {label: index for index, label in enumerate(unique_labels)}
        self.label_indices = [label_to_int[label] for label in self.labels]
        self.labels = [[int(i == label_index) for i in range(len(unique_labels))] for label_index in self.label_indices]

def split_data(images, labels, train_ratio, validation_ratio, test_ratio):
    combined = list(zip(images, labels))
    shuffle(combined)
    shuffled_images, shuffled_labels = zip(*combined)

    train_end = int(len(shuffled_images) * train_ratio)
    validation_end = train_end + int(len(shuffled_images) * validation_ratio)

    x_train = shuffled_images[:train_end]
    y_train = shuffled_labels[:train_end]
    x_val = shuffled_images[train_end:validation_end]
    y_val = shuffled_labels[train_end:validation_end]
    x_test = shuffled_images[validation_end:]
    y_test = shuffled_labels[validation_end:]

    return list(x_train), list(x_val), list(x_test), list(y_train), list(y_val), list(y_test)

# Usage
dataset = FlowerDataset('flowers')
dataset.load_data()
dataset.preprocess_data()
dataset.one_hot_encode()

x_train, x_val, x_test, y_train, y_val, y_test = split_data(dataset.images, dataset.labels, 0.75, 0.15, 0.10)

# Print the dimensions of the preprocessed images
dataset.print_dataset_dimensions()

# Print the sizes of the datasets
print(f"Train set size: {len(x_train)}, Validation set size: {len(x_val)}, Test set size: {len(x_test)}")

# Print the unique classes in the training set
unique_classes = sorted(set([label.index(1) for label in y_train]))
print(f"Unique classes in the training set: {unique_classes}")

num_classes = len(unique_classes)
print(f"Number of classes for prediction: {num_classes}")


Total number of images: 50
Dimensions of each image: (150, 150, 3)
Total dimensions of the dataset (assuming all images have the same shape): 50 x (150, 150, 3)
Train set size: 37, Validation set size: 7, Test set size: 6
Unique classes in the training set: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Number of classes for prediction: 10


In [3]:
# Base Layer class
class Layer:
    def forward(self, input_data):
        raise NotImplementedError

    def backward(self, output_error, learning_rate):
        raise NotImplementedError

# Convolutional Layer
class ConvLayer(Layer):
    def __init__(self, num_filters, filter_size, num_channels=3, padding=0, stride=1):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, filter_size, filter_size, num_channels) / 9  # normalize values
        self.padding = padding
        self.stride = stride
        
    def forward(self, input_data):
        # Assert that the input data has the expected shape
        assert len(input_data.shape) == 4, f"Expected input shape to be 4D, got {input_data.shape}"
        # Ensure the input data has four dimensions: batch size, height, width, and depth
        if len(input_data.shape) < 4:
            input_data = np.expand_dims(input_data, axis=0)  # Add a batch dimension
        self.last_input = input_data
        batch_size, h, w, d = input_data.shape

        # Define input_padded here
        input_padded = np.pad(input_data, (
            (0, 0), 
            (self.padding, self.padding), 
            (self.padding, self.padding), 
            (0, 0)
        ), 'constant')

        new_h = int((h + 2 * self.padding - self.filter_size) / self.stride) + 1
        new_w = int((w + 2 * self.padding - self.filter_size) / self.stride) + 1
        output = np.zeros((batch_size, new_h, new_w, self.num_filters))

        for b in range(batch_size):
            for i in range(0, new_h):
                for j in range(0, new_w):
                    for f in range(self.num_filters):
                        output[b, i, j, f] = np.sum(
                            input_padded[b, i*self.stride:i*self.stride+self.filter_size, j*self.stride:j*self.stride+self.filter_size, :] * self.filters[f]
                        )
        return output

    def backward(self, output_error, learning_rate):
        d_filters = np.zeros(self.filters.shape)
        d_input = np.zeros(self.last_input.shape)
        batch_size, h_i, w_i, d_i = self.last_input.shape  # Add 'batch_size' here
    
        new_h = int((h_i + 2 * self.padding - self.filter_size) / self.stride) + 1
        new_w = int((w_i + 2 * self.padding - self.filter_size) / self.stride) + 1
    
        for b in range(batch_size):  # Add a loop over the batch dimension
            for i in range(0, new_h):
                for j in range(0, new_w):
                    for f in range(self.num_filters):
                        gradient_filters = output_error[b, i, j, f] * self.last_input[b, i*self.stride:i*self.stride+self.filter_size, j*self.stride:j*self.stride+self.filter_size]
                        gradient_filters = np.clip(gradient_filters, -1e2, 1e2)  # Clip values to a reasonable range
                        d_filters[f] += gradient_filters
                    
                        gradient_input = self.filters[f] * output_error[b, i, j, f]
                        gradient_input = np.clip(gradient_input, -1e2, 1e2)  # Clip values to a reasonable range
                        d_input[b, i*self.stride:i*self.stride+self.filter_size, j*self.stride:j*self.stride+self.filter_size] += gradient_input
        self.filters -= learning_rate * d_filters
        return d_input

    def reset(self):
        self.last_input = None
        
# MaxPooling Layer
class MaxPoolingLayer(Layer):
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward(self, input_data):
        self.last_input = input_data
        batch_size, h, w, num_filters = input_data.shape
        output = np.zeros((batch_size, h // self.pool_size, w // self.pool_size, num_filters))
        for b in range(batch_size):
            for i in range(h // self.pool_size):
                for j in range(w // self.pool_size):
                    for f in range(num_filters):
                        output[b, i, j, f] = np.max(input_data[b, i*self.pool_size:(i+1)*self.pool_size, j*self.pool_size:(j+1)*self.pool_size, f])
        return output

    def backward(self, output_error, learning_rate):
        d_input = np.zeros(self.last_input.shape)
        batch_size, h, w, num_filters = self.last_input.shape

        for b in range(batch_size):
            for i in range(h // self.pool_size):
                for j in range(w // self.pool_size):
                    for f in range(num_filters):
                        h_start = i * self.pool_size
                        w_start = j * self.pool_size
                        # Find the max index within the patch
                        patch = self.last_input[b, h_start:h_start+self.pool_size, w_start:w_start+self.pool_size, f]
                        max_index = np.argmax(patch)
                        # Convert the index to 2D coordinates
                        max_coord = np.unravel_index(max_index, patch.shape)
                        # Propagate the gradient to the max location
                        d_input[b, h_start + max_coord[0], w_start + max_coord[1], f] = output_error[b, i, j, f]

        return d_input
        
class FlattenLayer(Layer):
    def forward(self, input_data):
        self.last_input_shape = input_data.shape
        return input_data.reshape(input_data.shape[0], -1)
        
    def backward(self, output_error, learning_rate):
        return output_error.reshape(self.last_input_shape)
        
# Dropout Layer
class DropoutLayer(Layer):
    def __init__(self, dropout_rate):
        self.dropout_rate = dropout_rate

    def forward(self, input_data):
        self.mask = (np.random.rand(*input_data.shape) > self.dropout_rate) / (1.0 - self.dropout_rate)
        return input_data * self.mask

    def backward(self, output_error, learning_rate):
        return output_error * self.mask

# Dense Layer
class DenseLayer(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * np.sqrt(2. / input_size)
        self.biases = np.zeros(output_size)  # Ensure biases is a 1D array with length 'output_size'
        
    def forward(self, input_data):
        # Assert that the input data is 2D after flattening
        assert len(input_data.shape) == 2, f"Expected input shape to be 2D, got {input_data.shape}"
        self.last_input = input_data
        return np.dot(input_data, self.weights) + self.biases
    
    def backward(self, output_error, learning_rate):
        d_weights = np.outer(self.last_input, output_error)
        d_biases = output_error
        self.weights -= learning_rate * d_weights
        # Ensure 'd_biases' is a 1D array before subtraction
        d_biases = d_biases.flatten() if d_biases.ndim > 1 else d_biases
        self.biases -= learning_rate * d_biases
        return np.dot(output_error, self.weights.T)
        
class SoftmaxLayer(Layer):
    def forward(self, input_data):
        # Reshape input_data to 2D if it's 1D
        if input_data.ndim == 1:
            input_data = input_data.reshape(1, -1)

        input_data = np.clip(input_data, -1e2, 1e2)  # Clip values to a reasonable range
        exp = np.exp(input_data - np.max(input_data, axis=1, keepdims=True))
        return exp / np.sum(exp, axis=1, keepdims=True)

    def backward(self, output_error, learning_rate):
        return output_error  # Error is passed straight through to the next layer
        
# ReLU Activation
class ReLULayer(Layer):
    def forward(self, input_data):
        self.last_input = input_data
        return np.maximum(0, input_data)

    def backward(self, output_error, learning_rate):
        # The derivative of ReLU is 1 for positive inputs and 0 for negative inputs
        return output_error * (self.last_input > 0)

# Cross-Entropy Loss
class CrossEntropyLoss:
    def calculate_loss(self, predicted, actual):
        # Convert 'actual' to a NumPy array if it's a list
        actual = np.array(actual) if isinstance(actual, list) else actual
        return -np.sum(actual * np.log(predicted + 1e-9))  # add small constant to avoid log(0)
        
    def calculate_gradient(self, predicted, actual):
        # Convert 'actual' to a NumPy array if it's a list
        actual = np.array(actual) if isinstance(actual, list) else actual
        # Ensure 'actual' is a 2D array with the same shape as 'predicted'
        if actual.ndim == 1:
            actual = actual.reshape(1, -1)
            # print(actual)
        return predicted - actual

In [None]:
class CNN:
    def __init__(self, num_classes, threshold=0.5, early_stopping_patience=5):
        self.layers = [
            ConvLayer(num_filters=32, filter_size=3),
            ReLULayer(),
            MaxPoolingLayer(pool_size=2),
            FlattenLayer(),
            # Placeholder for DenseLayer, will initialize properly later
            None,
            SoftmaxLayer()
        ] 
        self.loss = CrossEntropyLoss()
        self.threshold = threshold
        self.early_stopping_patience = early_stopping_patience
        self.best_loss = np.inf
        self.patience_counter = 0

    def forward(self, X):
        for layer in self.layers:
            if layer is not None:
                # Ensure X is a NumPy array
                if isinstance(X, list):
                    X = np.array(X)
                print(f"+ Before forward - {type(layer).__name__}: {X.shape}")
                X = layer.forward(X)
                print(f"- After forward - {type(layer).__name__}: {X.shape}")
        return X

    def backward(self, output_error, learning_rate):
        for layer in reversed(self.layers):
            if layer is not None:
                print(f"+ Before backward - {type(layer).__name__}: {output_error.shape}")
                output_error = layer.backward(output_error, learning_rate)
                print(f"- After backward - {type(layer).__name__}: {output_error.shape}")

    def reset(self):
        for layer in self.layers:
            # if hasattr(layer, 'reset'):
            if hasattr(layer, 'last_input'):
                # layer.reset()
                layer.last_input = None

    # def validate(self, X_val, y_val):
    #     # Compute loss and accuracy on the validation set
    #     val_loss = self.calculate_loss(self.forward(X_val), y_val)
    #     val_accuracy = self.calculate_accuracy(self.forward(X_val), y_val)
    #     return val_loss, val_accuracy

    def validate(self, X_val, y_val):
        val_losses = []
        val_accuracies = []
    
        # Iterate over each example in the validation set
        for i in range(len(X_val)):
            # Forward pass
            input_data = np.expand_dims(X_val[i], axis=0)  # Add a batch dimension
            output = self.forward(input_data)
    
            # Calculate loss and accuracy for the current validation example
            val_loss = self.loss.calculate_loss(output, y_val[i])
            val_losses.append(val_loss)
            val_accuracy = (np.argmax(output, axis=1) == y_val[i]).mean()
            val_accuracies.append(val_accuracy)
    
        # Calculate average loss and accuracy over the entire validation set
        avg_val_loss = np.mean(val_losses)
        avg_val_accuracy = np.mean(val_accuracies)
    
        return avg_val_loss, avg_val_accuracy
    
    def calculate_loss(self, X, y):
        return self.loss.calculate_loss(X, y)

    def calculate_accuracy(self, X, y):
        predictions = self.predict(X)
        return np.mean(predictions == y)

    def train(self, X_train, y_train, X_val, y_val, epochs, learning_rate):
        print(f"Initial training data shape: {X_train[0].shape}\n")
        
        # Calculate the output size after the convolution and pooling layers
        sample_output = self.forward(np.expand_dims(X_train[0], axis=0))
        flattened_size = sample_output.size
        
        # Initialize the DenseLayer with the correct input size
        self.layers[-2] = DenseLayer(input_size=flattened_size, output_size=num_classes)

        print("\n\n  |====== Start training ======| \n")
        
        for epoch in range(epochs):
            train_losses = []
            train_accuracies = []

            print(f"Epoch {epoch} - Total number of rounds: {len(X_train)}")
            
            
            for j in range(len(X_train)):
                print(f"\n=== start round {j}/{len(X_train)-1} ===\n")
                
                # forward
                input_data = np.expand_dims(X_train[j], axis=0)  # Add a batch dimension
                output = self.forward(input_data)

                print("---------------------------------------")
                
                # backward
                output_error = self.loss.calculate_gradient(output, y_train[j])
                output_error = self.backward(output_error, learning_rate)
                                
                # Calculate loss and accuracy after the backward pass
                train_loss = self.loss.calculate_loss(output, y_train[j])
                train_losses.append(train_loss)
                train_accuracy = (np.argmax(output, axis=1) == y_train[j]).mean()
                train_accuracies.append(train_accuracy)
            
                self.reset()  # Reset the network's state after each forward pass
                print(f"\n=== end round {j} ===\n")

            print(f"\n=== end epoch {epoch} ===\n")
            avg_train_loss = np.mean(train_losses)
            avg_train_accuracy = np.mean(train_accuracies)

            self.reset()
            print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")

            # Validation step
            val_loss, val_accuracy = self.validate(X_val, y_val)
            
            print(f"Epoch: {epoch}, Train Loss: {avg_train_loss}, Train Accuracy: {avg_train_accuracy}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")
    
            if val_loss < self.best_loss:
                self.best_loss = val_loss
                self.patience_counter = 0
            else:
                self.patience_counter += 1
            if self.patience_counter >= self.early_stopping_patience:
                print("Early stopping due to no improvement in validation loss.")
                break

    def predict(self, X):
        predictions = []
        for i in range(len(X)):
            # Ensure the input data has four dimensions: batch size, height, width, and depth
            input_data = np.expand_dims(X[i], axis=0)  # Add a batch dimension
            output = self.forward(input_data)
            predictions.append(np.argmax(output, axis=1))  # Use axis=1 to get the class index
        return np.array(predictions)



cnn = CNN(num_classes)
cnn.train(x_train, y_train, x_val, y_val, epochs=5, learning_rate=1)

Initial training data shape: (150, 150, 3)

+ Before forward - ConvLayer: (1, 150, 150, 3)
- After forward - ConvLayer: (1, 148, 148, 32)
+ Before forward - ReLULayer: (1, 148, 148, 32)
- After forward - ReLULayer: (1, 148, 148, 32)
+ Before forward - MaxPoolingLayer: (1, 148, 148, 32)
- After forward - MaxPoolingLayer: (1, 74, 74, 32)
+ Before forward - FlattenLayer: (1, 74, 74, 32)
- After forward - FlattenLayer: (1, 175232)
+ Before forward - SoftmaxLayer: (1, 175232)
- After forward - SoftmaxLayer: (1, 175232)



Epoch 0 - Total number of rounds: 37

=== start round 0/36 ===

+ Before forward - ConvLayer: (1, 150, 150, 3)
- After forward - ConvLayer: (1, 148, 148, 32)
+ Before forward - ReLULayer: (1, 148, 148, 32)
- After forward - ReLULayer: (1, 148, 148, 32)
+ Before forward - MaxPoolingLayer: (1, 148, 148, 32)
- After forward - MaxPoolingLayer: (1, 74, 74, 32)
+ Before forward - FlattenLayer: (1, 74, 74, 32)
- After forward - FlattenLayer: (1, 175232)
+ Before forward - DenseLa

In [None]:
def train(self, X_train, y_train, X_val, y_val, epochs, learning_rate):
        print(f"Initial training data shape: {X_train[0].shape}")
        
        # Calculate the output size after the convolution and pooling layers
        sample_output = self.forward(np.expand_dims(X_train[0], axis=0))
        flattened_size = sample_output.size
        
        # Initialize the DenseLayer with the correct input size
        self.layers[-2] = DenseLayer(input_size=flattened_size, output_size=num_classes)
        
        for epoch in range(epochs):
            train_losses = []
            train_accuracies = []
            
            for j in range(len(X_train)):
                input_data = np.expand_dims(X_train[j], axis=0)  # Add a batch dimension
                output = self.forward(input_data)

                # # Print the shapes of the output and the labels
                # print(f"\nEpoch: {epoch}, Image: {j}")
                # for layer in self.layers:
                #     print(f"Before layer {type(layer).__name__}: {input_data.shape}")
                #     input_data = layer.forward(input_data)
                #     print(f"After layer {type(layer).__name__}: {input_data.shape}")
                
                # # Print the shapes of the output and the labels
                # y_train_array = np.array(y_train[j])
                # print(f"Label before one-hot encoding: {y_train[j]}")
                # print(f"Label shape after one-hot encoding: {y_train_array.shape}")
                
                # # Ensure the label is one-hot encoded correctly
                # if y_train_array.ndim == 1 and len(y_train_array) == num_classes:
                #     y_train_array = np.eye(num_classes)[y_train_array]
                #     print(f"One-hot encoded label: {y_train_array}")
                                
                output_error = self.loss.calculate_gradient(output, y_train[j])
                
                # Start the backward pass from the layer before the SoftmaxLayer
                for layer in reversed(self.layers[:-1]):
                    output_error = layer.backward(output_error, learning_rate)
                
                train_loss = self.calculate_loss(output, y_train[j])
                train_losses.append(train_loss)
                train_accuracy = self.calculate_accuracy(output, y_train[j])
                train_accuracies.append(train_accuracy)
            
            avg_train_loss = np.mean(train_losses)
            avg_train_accuracy = np.mean(train_accuracies)
            
            val_output = self.forward(X_val)
            val_loss = self.calculate_loss(val_output, y_val)
            val_accuracy = self.calculate_accuracy(val_output, y_val)
            
            print(f"Epoch: {epoch}, Train Loss: {avg_train_loss}, Train Accuracy: {avg_train_accuracy}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}")
    
            if val_loss < self.best_loss:
                self.best_loss = val_loss
                self.patience_counter = 0
            else:
                self.patience_counter += 1
            if self.patience_counter >= self.early_stopping_patience:
                print("Early stopping due to no improvement in validation loss.")
                break



def backward(self, output_error, learning_rate):
        for layer in reversed(self.layers):
            if layer is not None:
                print(f"Before layer - {type(layer).__name__}: input error shape {output_error.shape}")
                output_error = layer.backward(output_error, learning_rate)
                print(f"After layer - {type(layer).__name__}: output error shape {output_error.shape}")