In [1]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import sklearn
from PIL import Image

In [2]:
def preprocessImage(imagePath):

    with Image.open(imagePath) as img:
        img = img.convert('RGB')
        img.save(imagePath) 
    # This ensures that the image is saved in a format that can be read by OpenCV
    image = cv2.imread(imagePath) # loads pixel values

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # converts from BGR to RGB format
    image = cv2.resize(image, (256, 256)) # resizes to be square shape
    image  = cv2.bilateralFilter(image, 9, 75, 75) # applies a bilateral filter to smooth the image while preserving edges
    image = image / 255.0 # normalizes the light values, so the pixel values range from 0 to 1.

    
    return image

def showImage(image):
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [3]:
def initializeParameters(n, m):
    """"
    "Initializes the parameters for a neural network layer with n inputs and m outputs.
    """
    w = np.random.randn(n, m) * 0.01  
    b = np.zeros((1, m)) 
    return w, b

In [4]:
def output(x, w, b):
    """
    Computes the output of a linear layer given input x, weights w, and bias b.
    x: input data (1,n)
    w: weights (n,m)
    b: bias (1,m)
    
    Returns the output (1,m) as a numpy array with softmax function applied.
    """
    y = x @ w + b  # matrix multiplication and bias addition

    # Apply softmax to the output
    exp_y = np.exp(y - np.max(y))  # Subtract max for numerical stability
    return exp_y / np.sum(exp_y, axis = 1, keepdims=True)

In [5]:
def crossEntropyLoss(y_out, y_true):
    m = y_true.shape[0]  # number of samples in the batch
    log_probs = np.log(y_out)
    loss = -np.sum(y_true * log_probs) / m  # Average loss over all samples in the batch
    return loss


def gradient(y_out, y_true):
    """
    Computes the gradient of the output using logistic regression.

    """
    gradient = y_out - y_true
    return gradient

def updateParameters(w, b, gradient, learning_rate=0.01):
    """
    Updates the weights and bias using the gradient and learning rate.
    """
    
    w -= learning_rate * gradient
    b -= learning_rate * np.mean(gradient, axis=0) 
    return w, b

In [6]:
def load_images(folder):
    images = []
    labels = []
    animalTypes = os.listdir(folder)

    count = 0

    for index, animal in enumerate(animalTypes):
        if animal.startswith('.'):
            continue
        animalPath = os.path.join(folder, animal)

        if os.path.isdir(animalPath):
            for filename in os.listdir(animalPath):
                if filename.endswith('.jpg') or filename.endswith('.png'):
                    imagePath = os.path.join(animalPath, filename)
                    image = preprocessImage(imagePath)
                    images.append(image)

                    # Create and append one-hot encoded label
                    label_one_hot = np.zeros(len(animalTypes))
                    label_one_hot[index] = 1
                    labels.append(label_one_hot)
                    count += 1
            print(f"Processed {count} images from {animal}")
    return np.array(images), np.array(labels)

In [7]:
def train_with_minibatches(images, labels, batch_size=32, num_epochs=10, learning_rate=0.01):
    input_size = images.shape[1] * images.shape[2] * images.shape[3]
    num_classes = labels.shape[1]
    w, b = initializeParameters(input_size, num_classes)

    images = images.reshape(images.shape[0], -1)  # Flatten images

    num_samples = images.shape[0]
    for epoch in range(num_epochs):
        # Shuffle data for each epoch (important for good generalization)
        indices = np.random.permutation(num_samples)
        images = images[indices]
        labels = labels[indices]

        # Mini-batch processing
        for i in range(0, num_samples, batch_size):
            # Create a mini-batch
            batch_images = images[i:i + batch_size]
            batch_labels = labels[i:i + batch_size]

            # Forward pass
            y_out = output(batch_images, w, b)

            # Compute the loss
            loss = crossEntropyLoss(y_out, batch_labels)

            # Backpropagation and gradient computation
            grad = gradient(y_out, batch_labels)

            # Update parameters
            w, b = updateParameters(w, b, grad, learning_rate)

        # Print loss for each epoch
        print(f"Epoch {epoch}/{num_epochs}, Loss: {loss:.4f}")

    return w, b


In [8]:
# Load images and labels
images, labels = load_images("images")
print("Images shape:", images.shape)  # Should be (num_images, 256, 256, 3)
print("Labels shape:", labels.shape)  # Should be (num_images, num_classes)

# Train the model using mini-batches
w, b = train_with_minibatches(images, labels, batch_size=32, num_epochs=10, learning_rate=0.01)


Processed 441 images from cat




Processed 903 images from butterfly
Processed 903 images from dog
Processed 1279 images from sheep
Processed 1603 images from spider
Processed 1603 images from chicken
Processed 1603 images from horse
Processed 1603 images from squirrel
Processed 1603 images from cow




Processed 1970 images from elephant
Images shape: (1970, 256, 256, 3)
Labels shape: (1970, 11)


ValueError: operands could not be broadcast together with shapes (196608,11) (32,11) (196608,11) 