In [6]:
import numpy as np
from tqdm import tqdm
import pandas as pd
import random
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import time
import os
from scipy.signal import correlate2d
import cv2

In [22]:
class CNN:
    def __init__(self, input_size=(28, 28), num_filters=8, filter_size=3, pool_size=2, num_classes=10, learning_rate=0.01):
        self.input_size = input_size
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.pool_size = pool_size
        self.num_classes = num_classes
        self.lr = learning_rate

        # Initialize filters and weights
        self.filters = np.random.randn(num_filters, filter_size, filter_size) * 0.1
        self.fc_weights = np.random.randn(num_filters * ((input_size[0] - filter_size + 1) // pool_size) * 
                                          ((input_size[1] - filter_size + 1) // pool_size), num_classes) * 0.1
        self.fc_bias = np.zeros((1, num_classes))

    def convolve2D(self, image, kernel):
        return correlate2d(image, kernel, mode='valid')

        # kernel_size = kernel.shape[0]
        # output_size = (image.shape[0] - kernel_size + 1, image.shape[1] - kernel_size + 1)
        # output = np.zeros(output_size)

        # for i in range(output_size[0]):
        #     for j in range(output_size[1]):
        #         region = image[i:i + kernel_size, j:j + kernel_size]
        #         output[i, j] = np.sum(region * kernel)

        # return output

    def max_pooling(self, feature_map, size):
        h, w = feature_map.shape
        output_size = (h // size, w // size)
        output = np.zeros(output_size)
        self.pool_cache = {}

        for i in range(output_size[0]):
            for j in range(output_size[1]):
                region = feature_map[i * size:(i + 1) * size, j * size:(j + 1) * size]
                max_val = np.max(region)
                max_pos = np.unravel_index(np.argmax(region), region.shape)
                output[i, j] = max_val
                self.pool_cache[(i, j)] = (i * size + max_pos[0], j * size + max_pos[1])  # Store relative positions

        return output

    def sigmoid(self, x):
        x = np.clip(x, -500, 500)  # Prevent overflow
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def softmax(self, x):
        exps = np.exp(x - np.max(x))
        return exps / np.sum(exps, axis=1, keepdims=True)

    def cross_entropy_loss(self, probs, label):
        probs = np.clip(probs, 1e-10, 1)  # Prevent log(0)
        return -np.log(probs[0, label])  # Corrected label indexing

    def forward(self, image):
        self.input_image = image
        self.feature_maps = np.array([self.convolve2D(image, kernel) for kernel in self.filters])
        self.sigmoid_maps = np.array([self.sigmoid(x) for x in self.feature_maps])
        self.pooled_maps = np.array([self.max_pooling(fm, self.pool_size) for fm in self.sigmoid_maps])

        self.flattened = self.pooled_maps.flatten().reshape(1, -1)
        self.scores = np.dot(self.flattened, self.fc_weights) + self.fc_bias
        self.probs = self.softmax(self.scores)

        return self.probs

    def backward(self, label):
        d_scores = self.probs.copy()  # Don't modify self.probs
        d_scores[0, label] -= 1  # Compute gradient of loss w.r.t. scores

        d_fc_weights = np.dot(self.flattened.T, d_scores)
        d_fc_bias = np.sum(d_scores, axis=0, keepdims=True)
        d_flattened = np.dot(d_scores, self.fc_weights.T)

        d_pooled = d_flattened.reshape(self.pooled_maps.shape)

        # Backprop through pooling
        d_sigmoid_maps = np.zeros_like(self.sigmoid_maps)
        for i in range(self.num_filters):
            for (py, px), (y, x) in self.pool_cache.items():
                d_sigmoid_maps[i, y, x] = d_pooled[i, py, px]  # Use stored pooling indices

        # Backprop through sigmoid
        d_feature_maps = d_sigmoid_maps * self.sigmoid_derivative(self.sigmoid_maps)

        # Backprop through convolution
        d_filters = np.zeros_like(self.filters)
        for i in range(self.num_filters):
            for y in range(self.filter_size):
                for x in range(self.filter_size):
                    region = self.input_image[y:y + d_feature_maps.shape[1], x:x + d_feature_maps.shape[2]]
                    d_filters[i, y, x] = np.sum(region * d_feature_maps[i])

        # Gradient clipping to prevent instability
        d_fc_weights = np.clip(d_fc_weights, -1, 1)
        d_fc_bias = np.clip(d_fc_bias, -1, 1)
        d_filters = np.clip(d_filters, -1, 1)

        # Update weights
        self.fc_weights -= self.lr * d_fc_weights
        self.fc_bias -= self.lr * d_fc_bias
        self.filters -= self.lr * d_filters


    def predict(self, image):
        """ Returns predicted class label """
        output = self.forward(image)
        return np.argmax(output)

    def train(self, dataset, labels, epochs=10, batch_size=32):
        num_samples = dataset.shape[0]
        history = {'loss': [], 'f1_score': [], 'training_time': []}
        
        for epoch in range(epochs):
            start_time = time.time()
            loss = 0
            num_batches = num_samples//batch_size
            all_preds = []
            all_labels = []
            
            with tqdm(total=num_batches, desc=f"Epoch {epoch + 1}/{epochs}") as pbar:
                for i in range(0, num_samples, batch_size):
                    X_batch = dataset[i:i+batch_size]
                    y_batch = labels[i:i+batch_size]
                    batch_loss = 0
            
                    for image, label in zip(X_batch, y_batch):
                        #image, label = dataset[i], labels[i]
                        output = self.forward(image)
                        batch_loss += self.cross_entropy_loss(output, label)
                        self.backward(label)

                        pred_label = np.argmax(output)
                        all_preds.append(pred_label)
                        all_labels.append(label)
                    loss+= batch_loss/len(X_batch)
                    pbar.update(1)

            avg_loss = loss / len(dataset)
            f1 = f1_score(all_labels, all_preds, average="weighted")
            epoch_time = time.time() - start_time

            history['loss'].append(avg_loss)
            history['f1_score'].append(f1)
            history['training_time'].append(epoch_time)

            print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")
            print("f1 score: ", f1)
        return history


    def evaluate(self, dataset, labels):
        preds = np.array([self.predict(image) for image in dataset])
        labels = np.array(labels)
        
        accuracy = np.mean(preds == labels)
        f1 = f1_score(labels, preds, average="weighted")
        conf_matrix = confusion_matrix(labels, preds)

        print(f"Accuracy: {accuracy * 100:.2f}%")
        print(f"F1 Score: {f1:.4f}")
        print(f"Confusion Matrix:\n{conf_matrix}")

        return {"accuracy": accuracy, "f1_score": f1, "confusion_matrix": conf_matrix}



In [23]:

# Load CIFAR-10 dataset
train_path = "/kaggle/input/cifar10-pngs-in-folders/cifar10/train"
test_path = "/kaggle/input/cifar10-pngs-in-folders/cifar10/test"

def load_cifar10(dataset_path):
    categories = os.listdir(dataset_path)
    label_map = {category: idx for idx, category in enumerate(categories)}
    
    images, labels = [], []
    for category in categories:
        category_path = os.path.join(dataset_path, category)
        for img_file in os.listdir(category_path):
            img_path = os.path.join(category_path, img_file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (32, 32)) / 255.0  # Normalize
            images.append(img)
            labels.append(label_map[category])
    
    return np.array(images), np.array(labels)

# Load dataset
X_train, y_train = load_cifar10(train_path)
X_test, y_test = load_cifar10(test_path)
X_train = X_train.reshape(X_train.shape[0], 32, 32)
X_test = X_test.reshape(X_test.shape[0], 32, 32)


In [25]:

# Paths to dataset
train_path = "/kaggle/input/cifar10-pngs-in-folders/cifar10/train"
test_path = "/kaggle/input/cifar10-pngs-in-folders/cifar10/test"

def load_cifar10(dataset_path, num_samples=10000):
    categories = os.listdir(dataset_path)
    label_map = {category: idx for idx, category in enumerate(categories)}
    
    images, labels = [], []
    
    for category in categories:
        category_path = os.path.join(dataset_path, category)
        img_files = os.listdir(category_path)

        # Ensure we don't sample more images than available
        num_to_sample = min(num_samples // len(categories), len(img_files))
        sampled_files = np.random.choice(img_files, size=num_to_sample, replace=False)

        for img_file in sampled_files:
            img_path = os.path.join(category_path, img_file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                print(f"Warning: Unable to read {img_path}")
                continue  # Skip unreadable images
            
            img = cv2.resize(img, (32, 32)) / 255.0  # Normalize
            images.append(img)
            labels.append(label_map[category])
    
    return np.array(images), np.array(labels)

# Load dataset
X_train, y_train = load_cifar10(train_path, num_samples=10000)
X_test, y_test = load_cifar10(test_path, num_samples=10000)

# Shuffle data
indices = np.arange(X_train.shape[0])
np.random.shuffle(indices)
X_train, y_train = X_train[indices], y_train[indices]

indices = np.arange(X_test.shape[0])
np.random.shuffle(indices)
X_test, y_test = X_test[indices], y_test[indices]

print("Train data shape:", X_train.shape, y_train.shape)
print("Test data shape:", X_test.shape, y_test.shape)


Train data shape: (10000, 32, 32) (10000,)
Test data shape: (10000, 32, 32) (10000,)


In [26]:

# CNN Class (same as provided, with input size updated)
cnn = CNN(input_size=(32, 32), num_filters=8, filter_size=3, pool_size=2, num_classes=10, learning_rate=0.01)

# Train the model
history = cnn.train(X_train, y_train, epochs=10, batch_size=32)


Epoch 1/10: 313it [02:40,  1.95it/s]                         


Epoch 1/10, Loss: 0.0700
f1 score:  0.2179732898236508


Epoch 2/10: 313it [02:40,  1.96it/s]                         


Epoch 2/10, Loss: 0.0660
f1 score:  0.2537552259641321


Epoch 3/10: 313it [02:40,  1.95it/s]                         


Epoch 3/10, Loss: 0.0651
f1 score:  0.26652397658945004


Epoch 4/10: 313it [02:40,  1.95it/s]                         


Epoch 4/10, Loss: 0.0643
f1 score:  0.27742704206830815


Epoch 5/10: 313it [02:40,  1.95it/s]                         


Epoch 5/10, Loss: 0.0636
f1 score:  0.2840992408021809


Epoch 6/10: 313it [02:40,  1.94it/s]                         


Epoch 6/10, Loss: 0.0630
f1 score:  0.29247083148036


Epoch 7/10: 313it [02:40,  1.95it/s]                         


Epoch 7/10, Loss: 0.0625
f1 score:  0.29761558552793005


Epoch 8/10: 313it [02:40,  1.95it/s]                         


Epoch 8/10, Loss: 0.0620
f1 score:  0.30187695577416296


Epoch 9/10: 313it [02:40,  1.95it/s]                         


Epoch 9/10, Loss: 0.0613
f1 score:  0.3096304773881331


Epoch 10/10: 313it [02:41,  1.94it/s]                         

Epoch 10/10, Loss: 0.0600
f1 score:  0.3243521103946366





In [29]:

# Evaluate on test set
eval_results = cnn.evaluate(X_test, y_test)
print("Evaluation Results:", eval_results)

Accuracy: 30.74%
F1 Score: 0.2783
Confusion Matrix:
[[417 103  81  33  66  82 110 101   1   6]
 [ 80 406  59  26  20 180  84 126   7  12]
 [ 72  60 467 173  51  40  26 108   2   1]
 [ 55  53 264 347  34  81  12 150   3   1]
 [216  72 187  66 212 105  60  82   0   0]
 [ 59 159  23  11  25 455 118 135   9   6]
 [126 114  33  21  20 213 291 166  11   5]
 [ 69  88  67  41  15 186  82 448   3   1]
 [ 87 135  49  27  15 371 106 197  12   1]
 [ 88 143  24  16  18 296 176 219   1  19]]
Evaluation Results: {'accuracy': 0.3074, 'f1_score': 0.2782575822167615, 'confusion_matrix': array([[417, 103,  81,  33,  66,  82, 110, 101,   1,   6],
       [ 80, 406,  59,  26,  20, 180,  84, 126,   7,  12],
       [ 72,  60, 467, 173,  51,  40,  26, 108,   2,   1],
       [ 55,  53, 264, 347,  34,  81,  12, 150,   3,   1],
       [216,  72, 187,  66, 212, 105,  60,  82,   0,   0],
       [ 59, 159,  23,  11,  25, 455, 118, 135,   9,   6],
       [126, 114,  33,  21,  20, 213, 291, 166,  11,   5],
       [ 69