# 2: Dataset Setup

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Subset
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
from joblib import dump, load
import pickle
import os
import pandas as pd

## Resizing images and normalizing them

In [None]:
transforms.resnet = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

### Loading dataset (CIFAR-10)

In [None]:
trainset_full = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.resnet)
testset_full = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.resnet)

100%|██████████| 170M/170M [00:14<00:00, 11.8MB/s]


Selecting 500 training images and 100 test images per class


In [None]:
def get_subset(dataset, indices):
    target = np.array(dataset.targets)
    selected_indices = []
    for i in range(10):
        i_indices = np.where(target == i)[0][:indices]
        selected_indices.extend(i_indices)
    return Subset(dataset, selected_indices)

trainset = get_subset(trainset_full, 500)
testset = get_subset(testset_full, 100)

### Loading pretrained ResNet-18 and removing the last layer

In [None]:
resnet18 = models.resnet18(pretrained=True)
feature_extractor = torch.nn.Sequential(*list(resnet18.children())[:-1])
feature_extractor.eval()



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 126MB/s]


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Con

### Extract feature vector to get 512x1

In [None]:
def extract_features(dataset, model, batch_size=64):
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
    features = []
    labels = []
    with torch.no_grad():
        for images, batch_labels in dataloader:
            outputs = model(images)
            outputs = outputs.view(outputs.size(0), -1)
            features.append(outputs)
            labels.append(batch_labels)
    features = torch.cat(features).numpy()
    labels = torch.cat(labels).numpy()
    return features, labels

train_features, train_labels = extract_features(trainset, feature_extractor)
test_features, test_labels = extract_features(testset, feature_extractor)

print(train_features.shape, test_features.shape)

(5000, 512) (1000, 512)


## Using PCA to reduce the size of feature vector from 512x1 to 50x1

In [None]:
pca = PCA(n_components=50)
train_features_pca = pca.fit_transform(train_features)
test_features_pca = pca.transform(test_features)

print(train_features_pca.shape, test_features_pca.shape)

(5000, 50) (1000, 50)


Helper function to extract metrics

In [None]:
import pandas as pd

def metrics_row(y_true, y_pred, model_name):
    acc = accuracy_score(y_true, y_pred)
    prec, rec, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='macro', zero_division=0
    )
    return {
        "Model": model_name,
        "Accuracy": acc,
        "Macro Precision": prec,
        "Macro Recall": rec,
        "Macro F1": f1
    }

# 3: Naive Bayes

Part3.1.1 - Build confusion matrix (rows=true labels, cols=predictions)


In [None]:
import numpy as np

def confusion_matrix_np(y_true, y_pred, num_classes=10):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    cm = np.zeros((num_classes, num_classes), dtype=np.int64)
    for t, p in zip(y_true, y_pred):
        cm[t, p] += 1
    return cm


Part3.1.2 - Compute precision, recall, F1, and accuracy (macro + per-class)

In [None]:
def precision_recall_f1_from_cm(cm, eps=1e-12):
    tp = np.diag(cm).astype(np.float64)
    fp = cm.sum(axis=0) - tp
    fn = cm.sum(axis=1) - tp

    precision = tp / (tp + fp + eps)
    recall    = tp / (tp + fn + eps)
    f1        = 2 * precision * recall / (precision + recall + eps)

    return {
        "per_class_precision": precision,
        "per_class_recall": recall,
        "per_class_f1": f1,
        "macro_precision": precision.mean(),
        "macro_recall": recall.mean(),
        "macro_f1": f1.mean(),
        "accuracy": tp.sum() / cm.sum()
    }


Part 3.1.3 — Pretty-print evaluation report with metrics and confusion matrix

In [None]:
def print_eval_report(name, y_true, y_pred, class_names=None):
    cm = confusion_matrix_np(y_true, y_pred, num_classes=10)
    m = precision_recall_f1_from_cm(cm)

    print(f"\n===== {name} =====")
    print("Confusion Matrix (rows=true, cols=pred):")
    print(cm)
    print("\nOverall:")
    print(f"- Accuracy       : {m['accuracy']:.4f}")
    print(f"- Macro Precision: {m['macro_precision']:.4f}")
    print(f"- Macro Recall   : {m['macro_recall']:.4f}")
    print(f"- Macro F1       : {m['macro_f1']:.4f}")

    if class_names is None:
        class_names = [str(i) for i in range(10)]
    for i, cname in enumerate(class_names):
        p = m["per_class_precision"][i]
        r = m["per_class_recall"][i]
        f = m["per_class_f1"][i]
        print(f"  class {i} ({cname}): P={p:.4f} R={r:.4f} F1={f:.4f}")

cifar10_classes = [
    "airplane","automobile","bird","cat","deer",
    "dog","frog","horse","ship","truck"
]


# Part 3.2.1 — Define Gaussian Naive Bayes class (NumPy only implementation)

In [None]:
class GaussianNaiveBayes:
    """
    NumPy-only Gaussian Naive Bayes:
    - Estimate class priors, per-class means/variances
    - Predict via log-likelihood + log-prior (argmax)
    """
    def __init__(self, var_smoothing=1e-9):
        self.var_smoothing = var_smoothing
        self.classes_ = None
        self.class_priors_ = None
        self.means_ = None
        self.vars_ = None

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)

        self.classes_ = np.unique(y)
        C = len(self.classes_)
        N, D = X.shape

        self.means_ = np.zeros((C, D), dtype=np.float64)
        self.vars_  = np.zeros((C, D), dtype=np.float64)
        self.class_priors_ = np.zeros(C, dtype=np.float64)

        for idx, c in enumerate(self.classes_):
            Xc = X[y == c]
            self.means_[idx] = Xc.mean(axis=0)
            self.vars_[idx]  = Xc.var(axis=0) + self.var_smoothing
            self.class_priors_[idx] = len(Xc) / float(N)
        return self

    def _log_gaussian_likelihood(self, X):
        X = np.asarray(X)
        means = self.means_[None, :, :]   # (1, C, D)
        vars_ = self.vars_[None, :, :]    # (1, C, D)
        X_    = X[:, None, :]             # (N, 1, D)

        log_term  = -0.5 * (np.log(2.0 * np.pi * vars_)).sum(axis=2)
        quad_term = -0.5 * (((X_ - means) ** 2) / vars_).sum(axis=2)
        return log_term + quad_term

    def predict(self, X):
        log_like = self._log_gaussian_likelihood(X)
        log_prior = np.log(self.class_priors_)[None, :]
        scores = log_like + log_prior
        idx = np.argmax(scores, axis=1)
        return self.classes_[idx]

Part 3.2.2 — Fit method: estimate means, variances, and priors for each class

In [None]:
def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)

        self.classes_ = np.unique(y)
        C = len(self.classes_)
        N, D = X.shape

        self.means_ = np.zeros((C, D), dtype=np.float64)
        self.vars_  = np.zeros((C, D), dtype=np.float64)
        self.class_priors_ = np.zeros(C, dtype=np.float64)

        for idx, c in enumerate(self.classes_):
            Xc = X[y == c]
            self.means_[idx] = Xc.mean(axis=0)
            self.vars_[idx]  = Xc.var(axis=0) + self.var_smoothing
            self.class_priors_[idx] = len(Xc) / float(N)
        return self

Part 3.2.3 — Log-likelihood computation and prediction using argmax of scores

In [None]:
def _log_gaussian_likelihood(self, X):
        X = np.asarray(X)
        means = self.means_[None, :, :]   # (1, C, D)
        vars_ = self.vars_[None, :, :]    # (1, C, D)
        X_    = X[:, None, :]             # (N, 1, D)

        log_term  = -0.5 * (np.log(2.0 * np.pi * vars_)).sum(axis=2)
        quad_term = -0.5 * (((X_ - means) ** 2) / vars_).sum(axis=2)
        return log_term + quad_term

def predict(self, X):
        log_like = self._log_gaussian_likelihood(X)
        log_prior = np.log(self.class_priors_)[None, :]
        scores = log_like + log_prior
        idx = np.argmax(scores, axis=1)
        return self.classes_[idx]

Part 3.3.1 — Fit (train) the scratch GNB

In [None]:
gnb_scratch = GaussianNaiveBayes(var_smoothing=1e-9)
gnb_scratch.fit(train_features_pca, train_labels)
print("Scratch GNB fitted on PCA-50 features.")



Scratch GNB fitted on PCA-50 features.


Part 3.3.2 — Predict & print evaluation report

In [None]:
pred_scratch = gnb_scratch.predict(test_features_pca)
print_eval_report(
    "Gaussian Naive Bayes (Scratch, PCA-50)",
    test_labels, pred_scratch,
    class_names=cifar10_classes
)


===== Gaussian Naive Bayes (Scratch, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[80  1  0  2  0  0  1  0 12  4]
 [ 3 89  0  2  1  0  0  0  0  5]
 [ 6  0 63  8  7  4 11  0  1  0]
 [ 1  0  3 74  4 10  7  1  0  0]
 [ 2  0  4  7 74  3  2  8  0  0]
 [ 0  1  6 13  3 74  2  1  0  0]
 [ 2  0  3  7  6  2 79  1  0  0]
 [ 2  1  0  4  8  5  0 79  1  0]
 [ 7  0  1  0  1  0  0  0 88  3]
 [ 5  3  0  1  0  0  0  1  1 89]]

Overall:
- Accuracy       : 0.7890
- Macro Precision: 0.7937
- Macro Recall   : 0.7890
- Macro F1       : 0.7896
  class 0 (airplane): P=0.7407 R=0.8000 F1=0.7692
  class 1 (automobile): P=0.9368 R=0.8900 F1=0.9128
  class 2 (bird): P=0.7875 R=0.6300 F1=0.7000
  class 3 (cat): P=0.6271 R=0.7400 F1=0.6789
  class 4 (deer): P=0.7115 R=0.7400 F1=0.7255
  class 5 (dog): P=0.7551 R=0.7400 F1=0.7475
  class 6 (frog): P=0.7745 R=0.7900 F1=0.7822
  class 7 (horse): P=0.8681 R=0.7900 F1=0.8272
  class 8 (ship): P=0.8544 R=0.8800 F1=0.8670
  class 9 (truck): P=0.8812 R=0.8900 F1

Collecting metric

In [None]:
row_gnb_scratch = metrics_row(test_labels, pred_scratch, "Naive Bayes (Scratch)")

## Part 3.4.1 — Gaussian Naive Bayes (scikit-learn)

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb_sklearn = GaussianNB(var_smoothing=1e-9)
gnb_sklearn.fit(train_features_pca, train_labels)
print("sklearn GNB fitted on PCA-50 features.")

sklearn GNB fitted on PCA-50 features.


Part 3.4.2 — Predict & print evaluation report

In [None]:
pred_sklearn = gnb_sklearn.predict(test_features_pca)
print_eval_report(
    "Gaussian Naive Bayes (scikit-learn, PCA-50)",
    test_labels, pred_sklearn,
    class_names=cifar10_classes
)


===== Gaussian Naive Bayes (scikit-learn, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[80  1  0  2  0  0  1  0 12  4]
 [ 3 89  0  2  1  0  0  0  0  5]
 [ 6  0 63  8  7  4 11  0  1  0]
 [ 1  0  3 74  4 10  7  1  0  0]
 [ 2  0  4  7 74  3  2  8  0  0]
 [ 0  1  6 13  3 74  2  1  0  0]
 [ 2  0  3  7  6  2 79  1  0  0]
 [ 2  1  0  4  8  5  0 79  1  0]
 [ 7  0  1  0  1  0  0  0 88  3]
 [ 5  3  0  1  0  0  0  1  1 89]]

Overall:
- Accuracy       : 0.7890
- Macro Precision: 0.7937
- Macro Recall   : 0.7890
- Macro F1       : 0.7896
  class 0 (airplane): P=0.7407 R=0.8000 F1=0.7692
  class 1 (automobile): P=0.9368 R=0.8900 F1=0.9128
  class 2 (bird): P=0.7875 R=0.6300 F1=0.7000
  class 3 (cat): P=0.6271 R=0.7400 F1=0.6789
  class 4 (deer): P=0.7115 R=0.7400 F1=0.7255
  class 5 (dog): P=0.7551 R=0.7400 F1=0.7475
  class 6 (frog): P=0.7745 R=0.7900 F1=0.7822
  class 7 (horse): P=0.8681 R=0.7900 F1=0.8272
  class 8 (ship): P=0.8544 R=0.8800 F1=0.8670
  class 9 (truck): P=0.8812 R=0.89

Collecting metric

In [None]:
row_gnb_sklearn = metrics_row(test_labels, pred_sklearn, " Scikit’s Gaussian Naive Bayes")

# 4: Decision Tree Implementing Gini Impurity

In [None]:
def gini_impurity(labels):
    classes, counts = np.unique(labels, return_counts=True)
    probs = counts / len(labels) #Probability
    return 1 - np.sum(probs ** 2) #Gini impurity formula

Splitting the dataset

In [None]:
def split_dataset(X, y, feature_idx, threshold): #x=features of the dataset, y=labels/targets
    left_indices = X[:, feature_idx] <= threshold
    right_indices = X[:, feature_idx] > threshold
    return X[left_indices], y[left_indices], X[right_indices], y[right_indices]

Finding the best split

In [None]:
def best_split(X, y):
    best_gini = 1
    best_feature_idx = None
    best_threshold = None

    n_features = X.shape[1] #number of features in the dataset

    for feature_idx in range(n_features):
        thresholds = np.unique(X[:, feature_idx])
        for threshold in thresholds:
            X_left, y_left, X_right, y_right = split_dataset(X, y, feature_idx, threshold)
            if len(y_left) == 0 or len(y_right) == 0:
                continue
            gini_left = gini_impurity(y_left)
            gini_right = gini_impurity(y_right)
            gini_weight = (len(y_left) * gini_left + len(y_right) * gini_right) / len(y)

            if gini_weight < best_gini:
                best_gini = gini_weight
                best_feature_idx = feature_idx
                best_threshold = threshold

    return best_feature_idx, best_threshold

Building the decision tree

In [None]:
class DecisionTree:
    def __init__(self, depth=0, max_depth=50):
        self.max_depth = max_depth
        self.depth = depth
        self.feature_idx = None #index that is used to split
        self.threshold = None
        self.left = None #left child
        self.right = None #right child
        self.value = None

#Creates a new node for that part of the tree
def buildTree(X, y, depth=0, max_depth=50):
    node = DecisionTree(depth, max_depth)

    #stop condition (pure node or max depth reaqched)
    if len(np.unique(y)) == 1 or depth >= max_depth:
        node.value = np.bincount(y).argmax()
        return node

    #best split (picks feature and threashold with lowest Gini impurity)
    feature_idx, threshold = best_split(X, y)
    if feature_idx is None:
        node.value = np.bincount(y).argmax()
        return node

    node.feature_idx = feature_idx
    node.threshold = threshold

    X_left, y_left, X_right, y_right = split_dataset(X, y, feature_idx, threshold)
    node.left = buildTree(X_left, y_left, depth + 1, max_depth)
    node.right = buildTree(X_right, y_right, depth + 1, max_depth)

    return node

Prediction of Decision Tree

In [None]:
def predict(node, X):
    y_prediction = []
    for x in X: #start at the root node
        current = node

        #traversing the tree (until leaf is reached)
        while current.value is None:
            if x[current.feature_idx] <= current.threshold:
                current = current.left
            else:
                current = current.right
        y_prediction.append(current.value) #take the class label as prediction if leaf node is reached
    return np.array(y_prediction) #returns all the predictions

Evaluation Metrics for the Decision Tree

In [None]:
def confusionMatrix(y_true, y_prediction, num_classes=10):
    cm = np.zeros((num_classes, num_classes), dtype=int) #initializing num_class matrix to zero
    for t, p in zip(y_true, y_prediction):
        cm[t, p] += 1 #Rows = actual labels, Columns = predicted labels
    return cm

def computeMetrics(cm):
    accuracy = np.trace(cm) / np.sum(cm)
    precision = np.diag(cm) / np.maximum(cm.sum(axis=0), 1)
    recall = np.diag(cm) / np.maximum(cm.sum(axis=1), 1)
    f1 = 2 * precision * recall / np.maximum(precision + recall, 1e-6)
    return accuracy, precision, recall, f1

Training and testing

In [None]:
model_path = "decision_tree_scratch.pkl"

#Train if not saved (Saving model)
if not os.path.exists(model_path):
  tree = buildTree(train_features_pca, train_labels, max_depth=10)
  with open(model_path, "wb") as f:
    pickle.dump(tree, f)
else:
  with open(model_path, "rb") as f:
    tree = pickle.load(f)

#Evaluation
y_prediction = predict(tree, test_features_pca)

cm = confusionMatrix(test_labels, y_prediction)

accuracy, precision, recall, f1 = computeMetrics(cm)

class_names = [
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck"
]

print("\n===== Decision Tree (Scratch, PCA-50) =====")
print("Confusion Matrix (rows=true, cols=pred):")
print(cm)

macro_precision = np.mean(precision)
macro_recall = np.mean(recall)
macro_f1 = np.mean(f1)

print("\nOverall:")
print(f"- Accuracy       : {accuracy:.4f}")
print(f"- Macro Precision: {macro_precision:.4f}")
print(f"- Macro Recall   : {macro_recall:.4f}")
print(f"- Macro F1       : {macro_f1:.4f}")

for i, name in enumerate(class_names):
    print(f"  class {i} ({name}): "
          f"P={precision[i]:.4f} R={recall[i]:.4f} F1={f1[i]:.4f}")


===== Decision Tree (Scratch, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[53  5 10  6  1  1  0  1 17  6]
 [ 8 71  2  1  0  1  2  0  4 11]
 [ 3  0 45 16  9 10 10  6  1  0]
 [ 2  1  9 54  3 20  7  4  0  0]
 [ 7  0  8  6 53  9  3 13  0  1]
 [ 0  0  5 22  4 62  1  4  2  0]
 [ 5  0  5 10  2  3 73  1  1  0]
 [ 1  1  8 13  7 13  0 55  1  1]
 [18  5  2  1  0  0  0  1 66  7]
 [ 4  9  0  2  1  0  0  1 10 73]]

Overall:
- Accuracy       : 0.6050
- Macro Precision: 0.6155
- Macro Recall   : 0.6050
- Macro F1       : 0.6077
  class 0 (airplane): P=0.5248 R=0.5300 F1=0.5274
  class 1 (automobile): P=0.7717 R=0.7100 F1=0.7396
  class 2 (bird): P=0.4787 R=0.4500 F1=0.4639
  class 3 (cat): P=0.4122 R=0.5400 F1=0.4675
  class 4 (deer): P=0.6625 R=0.5300 F1=0.5889
  class 5 (dog): P=0.5210 R=0.6200 F1=0.5662
  class 6 (frog): P=0.7604 R=0.7300 F1=0.7449
  class 7 (horse): P=0.6395 R=0.5500 F1=0.5914
  class 8 (ship): P=0.6471 R=0.6600 F1=0.6535
  class 9 (truck): P=0.7374 R=0.7300 F1=0.7337

Collecting metrics

In [None]:
row_dt_scratch = metrics_row(test_labels, y_prediction, "Decision Tree (Scratch)")

Scikit-Learn Decision Tree

In [None]:
model_path = "decision_tree_sklearn.pkl"

#Train if not saved (Saving model)
if not os.path.exists(model_path):
  clf = DecisionTreeClassifier(criterion = 'gini', max_depth=10, random_state=42)
  clf.fit(train_features_pca, train_labels)
  dump(clf, model_path)
else:
  clf = load(model_path)

y_prediction_sklearn = clf.predict(test_features_pca)

#computing matrix from sklearn
cm_sklearn = confusion_matrix(test_labels, y_prediction_sklearn)
accuracy_sklearn = accuracy_score(test_labels, y_prediction_sklearn)
precision_sklearn = precision_score(test_labels, y_prediction_sklearn, average=None, zero_division=0)
recall_sklearn = recall_score(test_labels, y_prediction_sklearn, average=None, zero_division=0)
f1_sklearn = f1_score(test_labels, y_prediction_sklearn, average=None, zero_division=0)

#printing information
class_names = [
    "airplane", "automobile", "bird", "cat", "deer",
    "dog", "frog", "horse", "ship", "truck"
]

print("\n===== Decision Tree (Scikit-learn, PCA-50) =====")
print("Confusion Matrix (rows=true, cols=pred):")
print(cm_sklearn)

macro_precision = np.mean(precision_sklearn)
macro_recall = np.mean(recall_sklearn)
macro_f1 = np.mean(f1_sklearn)

print("\nOverall:")
print(f"- Accuracy       : {accuracy_sklearn:.4f}")
print(f"- Macro Precision: {macro_precision:.4f}")
print(f"- Macro Recall   : {macro_recall:.4f}")
print(f"- Macro F1       : {macro_f1:.4f}")

for i, name in enumerate(class_names):
    print(f"  class {i} ({name}): "
          f"P={precision_sklearn[i]:.4f} R={recall_sklearn[i]:.4f} F1={f1_sklearn[i]:.4f}")


===== Decision Tree (Scikit-learn, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[52  5  9  5  3  1  0  3 17  5]
 [ 6 72  2  1  0  2  2  0  6  9]
 [ 3  0 45 15  6 10 12  8  1  0]
 [ 3  1  9 54  2 21  6  3  0  1]
 [ 6  0  8  6 54  8  3 14  0  1]
 [ 0  0  6 22  3 62  2  5  0  0]
 [ 4  0  6 10  5  3 70  2  0  0]
 [ 1  1  6 13  8 10  0 59  1  1]
 [20  4  3  1  0  0  1  1 65  5]
 [ 5 10  0  2  1  0  0  1  7 74]]

Overall:
- Accuracy       : 0.6070
- Macro Precision: 0.6165
- Macro Recall   : 0.6070
- Macro F1       : 0.6098
  class 0 (airplane): P=0.5200 R=0.5200 F1=0.5200
  class 1 (automobile): P=0.7742 R=0.7200 F1=0.7461
  class 2 (bird): P=0.4787 R=0.4500 F1=0.4639
  class 3 (cat): P=0.4186 R=0.5400 F1=0.4716
  class 4 (deer): P=0.6585 R=0.5400 F1=0.5934
  class 5 (dog): P=0.5299 R=0.6200 F1=0.5714
  class 6 (frog): P=0.7292 R=0.7000 F1=0.7143
  class 7 (horse): P=0.6146 R=0.5900 F1=0.6020
  class 8 (ship): P=0.6701 R=0.6500 F1=0.6599
  class 9 (truck): P=0.7708 R=0.7400 F1=0

Collecting metrics

In [None]:
row_dt_sklearn = metrics_row(test_labels, y_prediction_sklearn, "Scikit’s implementation of a Decision Tree")

# 5: Multi-Layer Perceptron (MLP)

## 5.1: Define the MLP architecture

In [None]:
import torch.nn as nn
import torch.optim as optim

class MLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP, self).__init__()
        self.layer_1 = nn.Linear(input_size, 512)
        self.relu_1 = nn.ReLU()
        self.layer_2 = nn.Linear(512, 512)
        self.bn_2 = nn.BatchNorm1d(512)
        self.relu_2 = nn.ReLU()
        self.layer_3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu_1(x)
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu_2(x)
        x = self.layer_3(x)
        return x

Define Loss Function and Optimizer

In [None]:
model = MLP(input_size=train_features_pca.shape[1], num_classes=10)
print("Multilayer Perceptron (MLP) created")

criterion = nn.CrossEntropyLoss()
print("Training with PyTorch's CrossEntropyLoss")

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
print("Using SGD optimizer with momentum of 0.9\n")

Multilayer Perceptron (MLP) created
Training with PyTorch's CrossEntropyLoss
Using SGD optimizer with momentum of 0.9



Generate confusion matrix for initial MLP

In [None]:
import torch

# Convert test data to PyTorch tensors
test_features_pca_tensor = torch.tensor(test_features_pca, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)

# Set the model to evaluation mode
model.eval()

# Make predictions
with torch.no_grad():
    outputs = model(test_features_pca_tensor)
    _, predicted = torch.max(outputs.data, 1)

# Convert predictions to numpy array
pred_mlp_initial = predicted.numpy()

# Lines to display the results (Confusion Matrix and metrics)
print_eval_report(
    "MLP (Initial, PCA-50)",
    test_labels, pred_mlp_initial,
    class_names=cifar10_classes
)

# Lines to append to the general table (commented out)
# mlp_row = metrics_row(test_labels, pred_mlp_initial, "MLP (Initial, PCA-50)")
# rows.append(mlp_row)
# df_results_mlp = pd.DataFrame(rows)
# display(df_results_mlp)


===== MLP (Initial, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[ 7  6  3  0 33  1  0 34  4 12]
 [ 2 10 39  0 29  0  0  2  0 18]
 [ 0 24  5  0  2  1  1 60  5  2]
 [ 0  9 13  0  0  0  1 62  4 11]
 [ 0 11 41  0  3  0  1 29  7  8]
 [ 1 10 23  0  3  0  1 44  5 13]
 [ 0 18 14  0  5  0  9 41  6  7]
 [ 0 23 21  0  2  2  1 34  5 12]
 [ 2 11 24  1 31  1  0  9  2 19]
 [ 0  4 46  0 30  3  0 10  2  5]]

Overall:
- Accuracy       : 0.0750
- Macro Precision: 0.1550
- Macro Recall   : 0.0750
- Macro F1       : 0.0664
  class 0 (airplane): P=0.5833 R=0.0700 F1=0.1250
  class 1 (automobile): P=0.0794 R=0.1000 F1=0.0885
  class 2 (bird): P=0.0218 R=0.0500 F1=0.0304
  class 3 (cat): P=0.0000 R=0.0000 F1=0.0000
  class 4 (deer): P=0.0217 R=0.0300 F1=0.0252
  class 5 (dog): P=0.0000 R=0.0000 F1=0.0000
  class 6 (frog): P=0.6429 R=0.0900 F1=0.1579
  class 7 (horse): P=0.1046 R=0.3400 F1=0.1600
  class 8 (ship): P=0.0500 R=0.0200 F1=0.0286
  class 9 (truck): P=0.0467 R=0.0500 F1=0.0483


Collecting metric

In [None]:
row_mlp_initial = metrics_row(test_labels, pred_mlp_initial, "MLP")

## 5.2.1: Adding layers

In [None]:
# Define an MLP with more layers
class DeeperMLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(DeeperMLP, self).__init__()
        self.layer_1 = nn.Linear(input_size, 512)
        self.relu_1 = nn.ReLU()
        self.layer_2 = nn.Linear(512, 512)
        self.bn_2 = nn.BatchNorm1d(512)
        self.relu_2 = nn.ReLU()
        self.layer_3 = nn.Linear(512, 256) # Added a new hidden layer
        self.relu_3 = nn.ReLU()
        self.layer_4 = nn.Linear(256, num_classes) # Output layer adjusted

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu_1(x)
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu_2(x)
        x = self.layer_3(x)
        x = self.relu_3(x)
        x = self.layer_4(x)
        return x

print("Deeper Multilayer Perceptron (MLP) created")

# Instantiate the deeper MLP model
deeper_model = DeeperMLP(input_size=train_features_pca.shape[1], num_classes=10)

# Define Loss Function and Optimizer for the deeper model
criterion = nn.CrossEntropyLoss()
optimizer_deeper = optim.SGD(deeper_model.parameters(), lr=0.001, momentum=0.9)

# Convert data to PyTorch tensors
train_features_pca_tensor = torch.tensor(train_features_pca, dtype=torch.float32)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
test_features_pca_tensor = torch.tensor(test_features_pca, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)

# Simple training loop (for demonstration - you might need a more robust one)
num_epochs = 10
batch_size = 64

train_dataset = torch.utils.data.TensorDataset(train_features_pca_tensor, train_labels_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

print("Starting training for Deeper MLP...")
for epoch in range(num_epochs):
    deeper_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer_deeper.zero_grad()
        outputs = deeper_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_deeper.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

print("Training finished.")

# Evaluate the deeper model
deeper_model.eval()
with torch.no_grad():
    outputs = deeper_model(test_features_pca_tensor)
    _, predicted_deeper = torch.max(outputs.data, 1)

# Generate and print evaluation report for the deeper MLP
pred_deeper_mlp = predicted_deeper.numpy()
print_eval_report(
    "MLP (Deeper, PCA-50)",
    test_labels, pred_deeper_mlp,
    class_names=cifar10_classes
)

# Add results to combined table (commented for now)
# rows.append(metrics_row(test_labels, pred_deeper_mlp, "MLP (Deeper, PCA-50)"))

Deeper Multilayer Perceptron (MLP) created
Starting training for Deeper MLP...
Epoch 1/10, Loss: 2.0487
Epoch 2/10, Loss: 1.3802
Epoch 3/10, Loss: 0.9311
Epoch 4/10, Loss: 0.7263
Epoch 5/10, Loss: 0.6252
Epoch 6/10, Loss: 0.5619
Epoch 7/10, Loss: 0.5177
Epoch 8/10, Loss: 0.4807
Epoch 9/10, Loss: 0.4475
Epoch 10/10, Loss: 0.4267
Training finished.

===== MLP (Deeper, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[80  1  2  2  0  0  0  0 12  3]
 [ 2 94  0  1  0  0  0  0  1  2]
 [ 4  0 70  6  4  7  8  0  1  0]
 [ 1  0  2 76  1 11  8  1  0  0]
 [ 2  0  2  8 79  1  1  5  2  0]
 [ 0  0  4 15  1 75  3  1  1  0]
 [ 1  0  1  4  3  2 89  0  0  0]
 [ 1  0  0  5 10  4  0 79  1  0]
 [ 4  0  1  0  0  0  0  0 93  2]
 [ 2  6  0  0  0  0  1  0  2 89]]

Overall:
- Accuracy       : 0.8240
- Macro Precision: 0.8293
- Macro Recall   : 0.8240
- Macro F1       : 0.8244
  class 0 (airplane): P=0.8247 R=0.8000 F1=0.8122
  class 1 (automobile): P=0.9307 R=0.9400 F1=0.9353
  class 2 (bird): P=0.8537 R=

Collecting metric

In [None]:
row_mlp_deeper = metrics_row(test_labels, pred_deeper_mlp, "MLP (Deeper, PCA-50)")

## 5.2.2: Removing layers

In [None]:
# Define an MLP with fewer layers (e.g., one hidden layer)
class ShallowerMLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ShallowerMLP, self).__init__()
        self.layer_1 = nn.Linear(input_size, 256) # Reduced hidden layer size
        self.relu_1 = nn.ReLU()
        self.layer_2 = nn.Linear(256, num_classes) # Output layer

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu_1(x)
        x = self.layer_2(x)
        return x

print("Shallower Multilayer Perceptron (MLP) created")

# Instantiate the shallower MLP model
shallower_model = ShallowerMLP(input_size=train_features_pca.shape[1], num_classes=10)

# Define Loss Function and Optimizer for the shallower model
criterion = nn.CrossEntropyLoss()
optimizer_shallower = optim.SGD(shallower_model.parameters(), lr=0.001, momentum=0.9)

# Convert data to PyTorch tensors
train_features_pca_tensor = torch.tensor(train_features_pca, dtype=torch.float32)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
test_features_pca_tensor = torch.tensor(test_features_pca, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)

# Simple training loop
num_epochs = 10
batch_size = 64

train_dataset = torch.utils.data.TensorDataset(train_features_pca_tensor, train_labels_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

print("Starting training for Shallower MLP...")
for epoch in range(num_epochs):
    shallower_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer_shallower.zero_grad()
        outputs = shallower_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_shallower.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

print("Training finished.")

# Evaluate the shallower model
shallower_model.eval()
with torch.no_grad():
    outputs = shallower_model(test_features_pca_tensor)
    _, predicted_shallower = torch.max(outputs.data, 1)

# Generate and print evaluation report for the shallower MLP
pred_shallower_mlp = predicted_shallower.numpy()
print_eval_report(
    "MLP (Shallower, PCA-50)",
    test_labels, pred_shallower_mlp,
    class_names=cifar10_classes
)

# Add results to combined table (commented for now)
# rows.append(metrics_row(test_labels, pred_shallower_mlp, "MLP (Shallower, PCA-50)"))

Shallower Multilayer Perceptron (MLP) created
Starting training for Shallower MLP...
Epoch 1/10, Loss: 1.8510
Epoch 2/10, Loss: 1.1006
Epoch 3/10, Loss: 0.8526
Epoch 4/10, Loss: 0.7276
Epoch 5/10, Loss: 0.6612
Epoch 6/10, Loss: 0.6116
Epoch 7/10, Loss: 0.5805
Epoch 8/10, Loss: 0.5510
Epoch 9/10, Loss: 0.5303
Epoch 10/10, Loss: 0.5160
Training finished.

===== MLP (Shallower, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[81  2  3  0  0  0  1  0  8  5]
 [ 2 86  0  1  0  0  1  0  2  8]
 [ 6  0 73  5  3  7  5  0  1  0]
 [ 1  0  3 72  1 16  7  0  0  0]
 [ 2  0  2  5 77  4  1  8  1  0]
 [ 0  0  4 16  3 72  2  2  1  0]
 [ 1  0  4  2  1  2 89  0  1  0]
 [ 1  1  1  4  9  5  0 78  1  0]
 [ 9  1  1  0  0  0  0  0 86  3]
 [ 2  4  0  1  0  0  0  1  4 88]]

Overall:
- Accuracy       : 0.8020
- Macro Precision: 0.8047
- Macro Recall   : 0.8020
- Macro F1       : 0.8024
  class 0 (airplane): P=0.7714 R=0.8100 F1=0.7902
  class 1 (automobile): P=0.9149 R=0.8600 F1=0.8866
  class 2 (bird): P=

Collecting metric

In [None]:
row_mlp_shallower = metrics_row(test_labels, pred_shallower_mlp, "MLP (Shallower, PCA-50)")

## 5.3.1: Larger Hidden Layer

In [None]:
# Define an MLP with larger hidden layers (e.g., 1024 units)
class WiderMLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(WiderMLP, self).__init__()
        self.layer_1 = nn.Linear(input_size, 1024) # Increased hidden layer size
        self.relu_1 = nn.ReLU()
        self.layer_2 = nn.Linear(1024, 1024) # Increased hidden layer size
        self.bn_2 = nn.BatchNorm1d(1024)
        self.relu_2 = nn.ReLU()
        self.layer_3 = nn.Linear(1024, num_classes) # Output layer

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu_1(x)
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu_2(x)
        x = self.layer_3(x)
        return x

print("Wider Multilayer Perceptron (MLP) created")

# Instantiate the wider MLP model
wider_model = WiderMLP(input_size=train_features_pca.shape[1], num_classes=10)

# Define Loss Function and Optimizer for the wider model
criterion = nn.CrossEntropyLoss()
optimizer_wider = optim.SGD(wider_model.parameters(), lr=0.001, momentum=0.9)

# Convert data to PyTorch tensors
train_features_pca_tensor = torch.tensor(train_features_pca, dtype=torch.float32)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
test_features_pca_tensor = torch.tensor(test_features_pca, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)

# Simple training loop
num_epochs = 10
batch_size = 64

train_dataset = torch.utils.data.TensorDataset(train_features_pca_tensor, train_labels_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

print("Starting training for Wider MLP...")
for epoch in range(num_epochs):
    wider_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer_wider.zero_grad()
        outputs = wider_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_wider.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

print("Training finished.")

# Evaluate the wider model
wider_model.eval()
with torch.no_grad():
    outputs = wider_model(test_features_pca_tensor)
    _, predicted_wider = torch.max(outputs.data, 1)

# Generate and print evaluation report for the wider MLP
pred_wider_mlp = predicted_wider.numpy()
print_eval_report(
    "MLP (Wider, PCA-50)",
    test_labels, pred_wider_mlp,
    class_names=cifar10_classes
)

# Add results to combined table (commented for now)
# rows.append(metrics_row(test_labels, pred_wider_mlp, "MLP (Wider, PCA-50)"))

Wider Multilayer Perceptron (MLP) created
Starting training for Wider MLP...
Epoch 1/10, Loss: 1.1657
Epoch 2/10, Loss: 0.5947
Epoch 3/10, Loss: 0.4998
Epoch 4/10, Loss: 0.4503
Epoch 5/10, Loss: 0.4059
Epoch 6/10, Loss: 0.3846
Epoch 7/10, Loss: 0.3572
Epoch 8/10, Loss: 0.3322
Epoch 9/10, Loss: 0.3176
Epoch 10/10, Loss: 0.3102
Training finished.

===== MLP (Wider, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[86  0  3  0  0  0  2  0  6  3]
 [ 2 93  0  1  0  0  0  0  0  4]
 [ 5  0 78  5  2  4  6  0  0  0]
 [ 1  0  2 77  2  8  8  2  0  0]
 [ 2  0  2  6 79  1  1  8  1  0]
 [ 0  0  6 16  2 70  2  3  1  0]
 [ 1  0  2  4  2  1 89  0  1  0]
 [ 1  0  0  4  7  4  0 84  0  0]
 [ 4  0  1  1  0  0  0  0 93  1]
 [ 2  2  0  2  0  0  0  0  1 93]]

Overall:
- Accuracy       : 0.8420
- Macro Precision: 0.8449
- Macro Recall   : 0.8420
- Macro F1       : 0.8423
  class 0 (airplane): P=0.8269 R=0.8600 F1=0.8431
  class 1 (automobile): P=0.9789 R=0.9300 F1=0.9538
  class 2 (bird): P=0.8298 R=0.7

Collecting metric

In [None]:
row_mlp_wider = metrics_row(test_labels, pred_wider_mlp, "MLP (Wider, PCA-50)")

## 5.3.2: Smaller Hidden Layer

In [None]:
# Define an MLP with smaller hidden layers (e.g., 128 units)
class NarrowerMLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NarrowerMLP, self).__init__()
        self.layer_1 = nn.Linear(input_size, 128) # Decreased hidden layer size
        self.relu_1 = nn.ReLU()
        self.layer_2 = nn.Linear(128, 128) # Decreased hidden layer size
        self.bn_2 = nn.BatchNorm1d(128)
        self.relu_2 = nn.ReLU()
        self.layer_3 = nn.Linear(128, num_classes) # Output layer

    def forward(self, x):
        x = self.layer_1(x)
        x = self.relu_1(x)
        x = self.layer_2(x)
        x = self.bn_2(x)
        x = self.relu_2(x)
        x = self.layer_3(x)
        return x

print("Narrower Multilayer Perceptron (MLP) created")

# Instantiate the narrower MLP model
narrower_model = NarrowerMLP(input_size=train_features_pca.shape[1], num_classes=10)

# Define Loss Function and Optimizer for the narrower model
criterion = nn.CrossEntropyLoss()
optimizer_narrower = optim.SGD(narrower_model.parameters(), lr=0.001, momentum=0.9)

# Convert data to PyTorch tensors (uncommented for clarity and independence)
train_features_pca_tensor = torch.tensor(train_features_pca, dtype=torch.float32)
train_labels_tensor = torch.tensor(train_labels, dtype=torch.long)
test_features_pca_tensor = torch.tensor(test_features_pca, dtype=torch.float32)
test_labels_tensor = torch.tensor(test_labels, dtype=torch.long)

# Simple training loop
num_epochs = 10
batch_size = 64

train_dataset = torch.utils.data.TensorDataset(train_features_pca_tensor, train_labels_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

print("Starting training for Narrower MLP...")
for epoch in range(num_epochs):
    narrower_model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer_narrower.zero_grad()
        outputs = narrower_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_narrower.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

print("Training finished.")

# Evaluate the narrower model
narrower_model.eval()
with torch.no_grad():
    outputs = narrower_model(test_features_pca_tensor)
    _, predicted_narrower = torch.max(outputs.data, 1)

# Generate and print evaluation report for the narrower MLP
pred_narrower_mlp = predicted_narrower.numpy()
print_eval_report(
    "MLP (Narrower, PCA-50)",
    test_labels, pred_narrower_mlp,
    class_names=cifar10_classes
)

# Add results to combined table (commented for now)
# rows.append(metrics_row(test_labels, pred_narrower_mlp, "MLP (Narrower, PCA-50)"))

Narrower Multilayer Perceptron (MLP) created
Starting training for Narrower MLP...
Epoch 1/10, Loss: 1.9557
Epoch 2/10, Loss: 1.2973
Epoch 3/10, Loss: 1.0012
Epoch 4/10, Loss: 0.8427
Epoch 5/10, Loss: 0.7418
Epoch 6/10, Loss: 0.6750
Epoch 7/10, Loss: 0.6266
Epoch 8/10, Loss: 0.5856
Epoch 9/10, Loss: 0.5543
Epoch 10/10, Loss: 0.5261
Training finished.

===== MLP (Narrower, PCA-50) =====
Confusion Matrix (rows=true, cols=pred):
[[80  0  1  2  1  0  1  0 11  4]
 [ 3 91  0  1  0  0  0  0  1  4]
 [ 8  1 64  6  4  8  8  1  0  0]
 [ 1  0  2 73  3 13  7  1  0  0]
 [ 2  0  1  4 80  3  1  8  1  0]
 [ 0  0  3 10  5 78  2  1  1  0]
 [ 1  0  3  4  3  2 86  0  1  0]
 [ 0  1  1  3  6  4  0 84  0  1]
 [ 6  1  1  0  0  0  0  0 90  2]
 [ 3  3  0  2  0  0  1  0  2 89]]

Overall:
- Accuracy       : 0.8150
- Macro Precision: 0.8178
- Macro Recall   : 0.8150
- Macro F1       : 0.8146
  class 0 (airplane): P=0.7692 R=0.8000 F1=0.7843
  class 1 (automobile): P=0.9381 R=0.9100 F1=0.9239
  class 2 (bird): P=0.8

Collecting metrics

In [None]:
row_mlp_narrower = metrics_row(test_labels, pred_narrower_mlp, "MLP (Narrower, PCA-50)")

# 6: Convolutional Neural Network (CNN)

## 6.1: Implementing and training a VGG11 net

Using `torch.nn.CrossEntropyLoss`, and optimize using SGD optimizer with `momentum=0.9`

Implementing VGG11 - according to assignment description

In [None]:
class VGG11(nn.Module):
  def __init__(self, num_classes=10):
    super(VGG11, self).__init__()
    #extracting image features
    self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2, 2)
        )
    self.classifier = nn.Sequential(
            nn.Linear(512, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x

Preparing Data for CNN (resizing to 32x32)

In [None]:
transform_cnn = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


trainset_cnn = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cnn)
testset_cnn = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cnn)

trainset_cnn = get_subset(trainset_cnn, 500)
testset_cnn = get_subset(testset_cnn, 100)

trainloader = DataLoader(trainset_cnn, batch_size=64, shuffle=True)
testloader = DataLoader(testset_cnn, batch_size=64, shuffle=False)

Training the CNN

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG11(num_classes=10).to(device)

#where to store the model
model_path = "vgg11_base.pth"

#checking if model was saved and load it
if os.path.exists('vgg11_base.pth'):
  print("Loading saved model...")
  model.load_state_dict(torch.load(model_path, map_location=device))
  model.eval()
#Train from scratch
else:
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

  num_epochs = 10
  for epoch in range(num_epochs):
      model.train()
      running_loss = 0.0
      for images, labels in trainloader:
          images, labels = images.to(device), labels.to(device)

          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()

      #printing the loss to see that the model is training
      print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {running_loss/len(trainloader):.4f}")
  torch.save(model.state_dict(), model_path)

Epoch [1/10] - Loss: 2.1291
Epoch [2/10] - Loss: 1.6786
Epoch [3/10] - Loss: 1.3879
Epoch [4/10] - Loss: 1.1219
Epoch [5/10] - Loss: 0.8563
Epoch [6/10] - Loss: 0.6547
Epoch [7/10] - Loss: 0.3824
Epoch [8/10] - Loss: 0.2169
Epoch [9/10] - Loss: 0.1597
Epoch [10/10] - Loss: 0.1243


Evaluation of CNN

In [None]:
def evaluate_model(model, testloader, class_names):
    model.eval()
    device = next(model.parameters()).device
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds)
    prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None)
    macro_prec, macro_rec, macro_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')

    print("===== CNN (VGG11, CIFAR-10) =====")
    print("Confusion Matrix (rows=true, cols=pred):")
    print(cm)
    print("\nOverall:")
    print(f"- Accuracy       : {acc:.4f}")
    print(f"- Macro Precision: {macro_prec:.4f}")
    print(f"- Macro Recall   : {macro_rec:.4f}")
    print(f"- Macro F1       : {macro_f1:.4f}")

    for i, name in enumerate(class_names):
        print(f"  class {i} ({name}): P={prec[i]:.4f} R={rec[i]:.4f} F1={f1[i]:.4f}")
    return all_labels, all_preds

# CIFAR-10 class labels
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

labels_cnn, preds_cnn = evaluate_model(model, testloader, class_names)

#collecting metric
row_cnn = metrics_row(labels_cnn, preds_cnn, "CNN (VGG11)")


===== CNN (VGG11, CIFAR-10) =====
Confusion Matrix (rows=true, cols=pred):
[[71  4  4  0  2  0  1  1 17  0]
 [ 3 93  0  0  0  0  0  0  3  1]
 [18  1 42  4 15 10  5  2  3  0]
 [ 9  2 10 27 18 15 13  4  2  0]
 [10  1 10  3 59  4  4  8  1  0]
 [ 4  3 14 16  9 44  1  6  3  0]
 [ 2  6  7  2 12  1 68  0  2  0]
 [11  0  1  4 23  5  0 54  1  1]
 [18 10  1  1  0  0  1  0 69  0]
 [ 8 47  1  1  3  1  1  5 16 17]]

Overall:
- Accuracy       : 0.5440
- Macro Precision: 0.5801
- Macro Recall   : 0.5440
- Macro F1       : 0.5241
  class 0 (airplane): P=0.4610 R=0.7100 F1=0.5591
  class 1 (automobile): P=0.5569 R=0.9300 F1=0.6966
  class 2 (bird): P=0.4667 R=0.4200 F1=0.4421
  class 3 (cat): P=0.4655 R=0.2700 F1=0.3418
  class 4 (deer): P=0.4184 R=0.5900 F1=0.4896
  class 5 (dog): P=0.5500 R=0.4400 F1=0.4889
  class 6 (frog): P=0.7234 R=0.6800 F1=0.7010
  class 7 (horse): P=0.6750 R=0.5400 F1=0.6000
  class 8 (ship): P=0.5897 R=0.6900 F1=0.6359
  class 9 (truck): P=0.8947 R=0.1700 F1=0.2857


## 6.2: Adding convolutional layers

In [None]:
class VGG11_Add(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
            # added an extra layer
            nn.Conv2d(512, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU()
        )

        # final feature map = 512×2×2 = 2048
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        return self.classifier(x)


Preparing data after adding a layer

In [None]:
transform_cnn = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset_cnn = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cnn)
testset_cnn = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cnn)

trainset_cnn = get_subset(trainset_cnn, 500)
testset_cnn = get_subset(testset_cnn, 100)

trainloader = DataLoader(trainset_cnn, batch_size=64, shuffle=True)
testloader = DataLoader(testset_cnn, batch_size=64, shuffle=False)

Training CNN after adding layer

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG11_Add(num_classes=10).to(device)

model_path = "vgg11_add.pth"

#checking if model was saved and load it
if os.path.exists('vvgg11_add.pth'):
  print("Loading saved model...")
  model.load_state_dict(torch.load(model_path, map_location=device))
  model.eval()
else:
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

  num_epochs = 10
  for epoch in range(num_epochs):
      model.train()
      running_loss = 0.0
      for images, labels in trainloader:
          images, labels = images.to(device), labels.to(device)

          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()

      #printing the loss to see that the model is training
      print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {running_loss/len(trainloader):.4f}")
  torch.save(model.state_dict(), model_path)

Epoch [1/10] - Loss: 2.1743
Epoch [2/10] - Loss: 1.7558
Epoch [3/10] - Loss: 1.4336
Epoch [4/10] - Loss: 1.1342
Epoch [5/10] - Loss: 0.8868
Epoch [6/10] - Loss: 0.6371
Epoch [7/10] - Loss: 0.4843
Epoch [8/10] - Loss: 0.4207
Epoch [9/10] - Loss: 0.3262
Epoch [10/10] - Loss: 0.1797


Evaluation CNN after adding layer

In [None]:
def evaluate_model(model, testloader, class_names):
    model.eval()
    device = next(model.parameters()).device
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds)
    prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None)
    macro_prec, macro_rec, macro_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')

    print("===== CNN (VGG11-Added, CIFAR-10) =====")
    print("Confusion Matrix (rows=true, cols=pred):")
    print(cm)
    print("\nOverall:")
    print(f"- Accuracy       : {acc:.4f}")
    print(f"- Macro Precision: {macro_prec:.4f}")
    print(f"- Macro Recall   : {macro_rec:.4f}")
    print(f"- Macro F1       : {macro_f1:.4f}")

    for i, name in enumerate(class_names):
        print(f"  class {i} ({name}): P={prec[i]:.4f} R={rec[i]:.4f} F1={f1[i]:.4f}")
    return all_labels, all_preds

# CIFAR-10 class labels
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

labels_cnn_add, preds_cnn_add = evaluate_model(model, testloader, class_names)

row_cnn_add    = metrics_row(labels_cnn_add, preds_cnn_add, "CNN (add)")


===== CNN (VGG11-Added, CIFAR-10) =====
Confusion Matrix (rows=true, cols=pred):
[[62  5  4  3  1  2  2  9  5  7]
 [ 2 87  0  1  0  0  0  1  2  7]
 [13  4 21  5  8 36  5  5  2  1]
 [ 4  7  3 31  1 30 11 10  0  3]
 [ 6  2  8  7 28 28  5 16  0  0]
 [ 1  4  6 11  0 61  3 11  2  1]
 [ 0  3  2 12  3 11 66  2  0  1]
 [ 1  0  1  2  1 23  0 69  1  2]
 [25 15  0  3  0  3  1  0 42 11]
 [ 5 16  0  1  0  3  2  3  1 69]]

Overall:
- Accuracy       : 0.5360
- Macro Precision: 0.5663
- Macro Recall   : 0.5360
- Macro F1       : 0.5242
  class 0 (airplane): P=0.5210 R=0.6200 F1=0.5662
  class 1 (automobile): P=0.6084 R=0.8700 F1=0.7160
  class 2 (bird): P=0.4667 R=0.2100 F1=0.2897
  class 3 (cat): P=0.4079 R=0.3100 F1=0.3523
  class 4 (deer): P=0.6667 R=0.2800 F1=0.3944
  class 5 (dog): P=0.3096 R=0.6100 F1=0.4108
  class 6 (frog): P=0.6947 R=0.6600 F1=0.6769
  class 7 (horse): P=0.5476 R=0.6900 F1=0.6106
  class 8 (ship): P=0.7636 R=0.4200 F1=0.5419
  class 9 (truck): P=0.6765 R=0.6900 F1=0.6832


## 6.3: Removing convolutional layers

In [None]:
class VGG11_Remove(nn.Module):
  def __init__(self, num_classes=10):
    super(VGG11_Remove, self).__init__()
    self.features = nn.Sequential(
      nn.Conv2d(3, 64, 3, 1, 1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
      nn.Conv2d(64, 128, 3, 1, 1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
      nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(),
      nn.Conv2d(256, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
      nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(),
      #removed 3 layers
    )
    self.classifier = nn.Sequential(
      nn.Linear(512*4*4, 4096), nn.ReLU(), nn.Dropout(0.5),
      nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
      nn.Linear(4096, num_classes)
    )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    return self.classifier(x)

Preparing data after removing layer

In [None]:
transform_cnn = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset_cnn = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cnn)
testset_cnn = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cnn)

trainset_cnn = get_subset(trainset_cnn, 500)
testset_cnn = get_subset(testset_cnn, 100)

trainloader = DataLoader(trainset_cnn, batch_size=64, shuffle=True)
testloader = DataLoader(testset_cnn, batch_size=64, shuffle=False)

Training CNN after removing layer

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG11_Remove(num_classes=10).to(device)

model_path = "vgg11_remove.pth"

#checking if model was saved and load it
if os.path.exists('vvgg11_remove.pth'):
  print("Loading saved model...")
  model.load_state_dict(torch.load(model_path, map_location=device))
  model.eval()
else:
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

  num_epochs = 10
  for epoch in range(num_epochs):
      model.train()
      running_loss = 0.0
      for images, labels in trainloader:
          images, labels = images.to(device), labels.to(device)

          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()

      #printing the loss to see that the model is training
      print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {running_loss/len(trainloader):.4f}")
  torch.save(model.state_dict(), model_path)

Epoch [1/10] - Loss: 2.1968
Epoch [2/10] - Loss: 1.8405
Epoch [3/10] - Loss: 1.5912
Epoch [4/10] - Loss: 1.4139
Epoch [5/10] - Loss: 1.2917
Epoch [6/10] - Loss: 1.1850
Epoch [7/10] - Loss: 1.0598
Epoch [8/10] - Loss: 0.9210
Epoch [9/10] - Loss: 0.8102
Epoch [10/10] - Loss: 0.6758


Evaluating CNN after removing layer

In [None]:
def evaluate_model(model, testloader, class_names):
    model.eval()
    device = next(model.parameters()).device
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds)
    prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None)
    macro_prec, macro_rec, macro_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')

    print("===== CNN (VGG11-Remove, CIFAR-10) =====")
    print("Confusion Matrix (rows=true, cols=pred):")
    print(cm)
    print("\nOverall:")
    print(f"- Accuracy       : {acc:.4f}")
    print(f"- Macro Precision: {macro_prec:.4f}")
    print(f"- Macro Recall   : {macro_rec:.4f}")
    print(f"- Macro F1       : {macro_f1:.4f}")

    for i, name in enumerate(class_names):
        print(f"  class {i} ({name}): P={prec[i]:.4f} R={rec[i]:.4f} F1={f1[i]:.4f}")
    return all_labels, all_preds

# CIFAR-10 class labels
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

labels_cnn_remove, preds_cnn_remove = evaluate_model(model, testloader, class_names)

row_cnn_remove = metrics_row(labels_cnn_remove, preds_cnn_remove, "CNN (remove)")


===== CNN (VGG11-Remove, CIFAR-10) =====
Confusion Matrix (rows=true, cols=pred):
[[59  5  4  0  0  5  0  3 14 10]
 [ 1 82  1  1  0  1  1  1  4  8]
 [10  4 43  4  8 23  3  3  2  0]
 [ 5  5 10 24  3 33  6 10  0  4]
 [ 7  2 21  1 27 22  2 15  1  2]
 [ 0  1  9  7  2 70  1  8  2  0]
 [ 0  5 16  4  1  6 66  1  1  0]
 [ 1  2  2  4  2 16  0 67  3  3]
 [12  9  0  2  0  2  0  0 64 11]
 [ 1 13  1  0  0  1  0  2  7 75]]

Overall:
- Accuracy       : 0.5770
- Macro Precision: 0.5948
- Macro Recall   : 0.5770
- Macro F1       : 0.5669
  class 0 (airplane): P=0.6146 R=0.5900 F1=0.6020
  class 1 (automobile): P=0.6406 R=0.8200 F1=0.7193
  class 2 (bird): P=0.4019 R=0.4300 F1=0.4155
  class 3 (cat): P=0.5106 R=0.2400 F1=0.3265
  class 4 (deer): P=0.6279 R=0.2700 F1=0.3776
  class 5 (dog): P=0.3911 R=0.7000 F1=0.5018
  class 6 (frog): P=0.8354 R=0.6600 F1=0.7374
  class 7 (horse): P=0.6091 R=0.6700 F1=0.6381
  class 8 (ship): P=0.6531 R=0.6400 F1=0.6465
  class 9 (truck): P=0.6637 R=0.7500 F1=0.7042


## 6.4: Larger kernel size

Kernel size of 5x5

In [None]:
class VGG11_kernel5(nn.Module):
  def __init__(self, num_classes=10):
    super(VGG11_kernel5, self).__init__()
    self.features = nn.Sequential(
            #Chaning the kernel size to 5x5
            nn.Conv2d(3, 64, 5, 1, 2), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 5, 1, 2), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 5, 1, 2), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, 5, 1, 2), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 512, 5, 1, 2), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 5, 1, 2), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2, 2),
            nn.Conv2d(512, 512, 5, 1, 2), nn.BatchNorm2d(512), nn.ReLU(),
            nn.Conv2d(512, 512, 5, 1, 2), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(2, 2)
        )
    self.classifier = nn.Sequential(
            nn.Linear(512, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )

  def forward(self, x):
    x = self.features(x)
    x = torch.flatten(x, 1)
    x = self.classifier(x)
    return x

Prepare data for kernel 5x5

In [None]:
transform_cnn = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


trainset_cnn = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cnn)
testset_cnn = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cnn)

trainset_cnn = get_subset(trainset_cnn, 500)
testset_cnn = get_subset(testset_cnn, 100)

trainloader = DataLoader(trainset_cnn, batch_size=64, shuffle=True)
testloader = DataLoader(testset_cnn, batch_size=64, shuffle=False)

Training CNN with 5x5 kernel size

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG11_kernel5(num_classes=10).to(device)

model_path = "vgg11_k5.pth"

#checking if model was saved and load it
if os.path.exists('vvgg11_k5.pth'):
  print("Loading saved model...")
  model.load_state_dict(torch.load(model_path, map_location=device))
  model.eval()
else:
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

  num_epochs = 10
  for epoch in range(num_epochs):
      model.train()
      running_loss = 0.0
      for images, labels in trainloader:
          images, labels = images.to(device), labels.to(device)

          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()

          running_loss += loss.item()

      #printing the loss to see that the model is training
      print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {running_loss/len(trainloader):.4f}")
  torch.save(model.state_dict(), model_path)

Epoch [1/10] - Loss: 2.0645
Epoch [2/10] - Loss: 1.6382
Epoch [3/10] - Loss: 1.3605
Epoch [4/10] - Loss: 1.1716
Epoch [5/10] - Loss: 0.9356
Epoch [6/10] - Loss: 0.7532
Epoch [7/10] - Loss: 0.5631
Epoch [8/10] - Loss: 0.4055
Epoch [9/10] - Loss: 0.4562
Epoch [10/10] - Loss: 0.3220


Evaluating the CNN with kernel size 5x5

In [None]:
def evaluate_model(model, testloader, class_names):
    model.eval()
    device = next(model.parameters()).device
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    acc = accuracy_score(all_labels, all_preds)
    prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average=None)
    macro_prec, macro_rec, macro_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')

    print("===== CNN (VGG11_Kernel5, CIFAR-10) =====")
    print("Confusion Matrix (rows=true, cols=pred):")
    print(cm)
    print("\nOverall:")
    print(f"- Accuracy       : {acc:.4f}")
    print(f"- Macro Precision: {macro_prec:.4f}")
    print(f"- Macro Recall   : {macro_rec:.4f}")
    print(f"- Macro F1       : {macro_f1:.4f}")

    for i, name in enumerate(class_names):
        print(f"  class {i} ({name}): P={prec[i]:.4f} R={rec[i]:.4f} F1={f1[i]:.4f}")
    return all_labels, all_preds

# CIFAR-10 class labels
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

labels_cnn_k5, preds_cnn_k5 = evaluate_model(model, testloader, class_names)

row_cnn_k5      = metrics_row(labels_cnn_k5, preds_cnn_k5, "CNN (Kernel 5x5)")

===== CNN (VGG11_Kernel5, CIFAR-10) =====
Confusion Matrix (rows=true, cols=pred):
[[51 22  0  2  2  2  2 10  5  4]
 [ 0 93  0  0  1  2  1  2  0  1]
 [12  7  5  5 13 34  8 13  3  0]
 [ 3  8  0 17  8 43 10  9  0  2]
 [ 2  4  0  6 46 10 10 19  1  2]
 [ 0  2  1  2  2 72  4 16  1  0]
 [ 0  5  0  7 13  6 66  2  1  0]
 [ 0  1  0  3  2 11  1 80  0  2]
 [10 34  0  3  1  1  1  1 48  1]
 [ 2 40  0  0  0  2  0  8  2 46]]

Overall:
- Accuracy       : 0.5240
- Macro Precision: 0.5916
- Macro Recall   : 0.5240
- Macro F1       : 0.4926
  class 0 (airplane): P=0.6375 R=0.5100 F1=0.5667
  class 1 (automobile): P=0.4306 R=0.9300 F1=0.5886
  class 2 (bird): P=0.8333 R=0.0500 F1=0.0943
  class 3 (cat): P=0.3778 R=0.1700 F1=0.2345
  class 4 (deer): P=0.5227 R=0.4600 F1=0.4894
  class 5 (dog): P=0.3934 R=0.7200 F1=0.5088
  class 6 (frog): P=0.6408 R=0.6600 F1=0.6502
  class 7 (horse): P=0.5000 R=0.8000 F1=0.6154
  class 8 (ship): P=0.7869 R=0.4800 F1=0.5963
  class 9 (truck): P=0.7931 R=0.4600 F1=0.5823


## 6.5: Smaller kernel size

Kept the kernel size 3x3 as small, which was already implemented above

# 7: Evaluation Table

Building Evaluation Table

In [None]:
rows = [
    row_gnb_scratch,
    row_gnb_sklearn,
    row_dt_scratch,
    row_dt_sklearn,
    row_mlp_initial,
    row_mlp_deeper,
    row_mlp_shallower,
    row_mlp_wider,
    row_mlp_narrower,
    row_cnn,
    row_cnn_add,
    row_cnn_remove,
    row_cnn_k5
]

df_results = pd.DataFrame(rows)
df_results

Unnamed: 0,Model,Accuracy,Macro Precision,Macro Recall,Macro F1
0,Naive Bayes (Scratch),0.789,0.793704,0.789,0.789589
1,Scikit’s Gaussian Naive Bayes,0.789,0.793704,0.789,0.789589
2,Decision Tree (Scratch),0.605,0.615532,0.605,0.607692
3,Scikit’s implementation of a Decision Tree,0.607,0.616466,0.607,0.609781
4,MLP,0.075,0.155047,0.075,0.066388
5,"MLP (Deeper, PCA-50)",0.824,0.829258,0.824,0.824366
6,"MLP (Shallower, PCA-50)",0.802,0.804739,0.802,0.802436
7,"MLP (Wider, PCA-50)",0.842,0.844909,0.842,0.842337
8,"MLP (Narrower, PCA-50)",0.815,0.817791,0.815,0.814577
9,CNN (VGG11),0.544,0.580143,0.544,0.524077
