# SVM Vs ResNet DS675 Project
This project makes use of the CIFAR-10 Image classification datset to compare the performance of SVM and CNN.

In [13]:
# accessing the data batches and combining to from train and test set
import pickle
import numpy as np

def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict


In [14]:
X_train = []
y_train = []

for i in range(1, 6):
    batch = unpickle(f'cifar-10-batches-py/data_batch_{i}') # batches are of shape (10000, 3072)
    X_train.append(batch[b'data'])
    y_train.extend(batch[b'labels'])

X_train = np.vstack(X_train)   # shape (50000, 3072), after vertically stacking
y_train = np.array(y_train)    # shape (50000, )

# Test set
test_batch = unpickle('cifar-10-batches-py/test_batch')
X_test = test_batch[b'data']   # shape (10000, 3072)
y_test = np.array(test_batch[b'labels'])  #shape (10000, )

The SVM uses flat image data(1D) which is what the current X_train and X_test shapes are, but NN requires it to be in the true image format of 32 x 32 i.e 2D (3 channel)

In [15]:
# reshape the image data to be compliant with CNN
X_train_img = X_train.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
X_test_img = X_test.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)


# Preprocessing for each model

In [16]:
# SVM - Standardize
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_svm = scaler.fit_transform(X_train.astype("float32"))
X_test_svm = scaler.transform(X_test.astype("float32"))


In [17]:
# CNN - Normalize
X_train_cnn = X_train_img.astype('float32') / 255.0
X_test_cnn = X_test_img.astype('float32') / 255.0

In [9]:
print(X_train.shape, X_test.shape)
print(X_train_img.shape, X_test_img.shape)
print(y_train.shape, y_test.shape)
print(X_train.dtype, X_train_img.dtype)


(50000, 3072) (10000, 3072)
(50000, 32, 32, 3) (10000, 32, 32, 3)
(50000,) (10000,)
uint8 uint8


# Moving onto SVM 5 fold CV + grid on C, record runtime + inference

In [18]:
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
import time, pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

seed = 42
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

param_grid = {'C':[0.1, 0.01, 0.001]}

clf = LinearSVC(dual=False, max_iter=10000, random_state=seed)

In [19]:
grid = GridSearchCV(clf, param_grid, cv=cv, scoring='accuracy', return_train_score=True, n_jobs=2, verbose=2)

t0 = time.perf_counter()
grid.fit(X_train_svm, y_train)
t1 = time.perf_counter()
print(f"GridSearch total time: {t1-t0:.1f} s")

Fitting 5 folds for each of 3 candidates, totalling 15 fits
GridSearch total time: 21674.4 s


In [20]:
# summary results
res = pd.DataFrame(grid.cv_results_)
print(res[['params','mean_test_score','std_test_score','mean_fit_time','mean_score_time']])

best = grid.best_estimator_
print("Best params:", grid.best_params_, "CV acc:", grid.best_score_)

         params  mean_test_score  std_test_score  mean_fit_time  \
0    {'C': 0.1}          0.36012        0.002851    4792.973716   
1   {'C': 0.01}          0.37592        0.005380    2044.011279   
2  {'C': 0.001}          0.39440        0.004984     944.685522   

   mean_score_time  
0         0.178174  
1         0.128166  
2         0.108715  
Best params: {'C': 0.001} CV acc: 0.39440000000000003


In [21]:
# Inference time on final test set
t0 = time.perf_counter()
y_pred_test = best.predict(X_test_svm)
t1 = time.perf_counter()
inf_time_per_sample = (t1-t0)/len(X_test_svm)
print("Test accuracy:", accuracy_score(y_test, y_pred_test))
print("Inference time (avg ms/sample):", inf_time_per_sample*1000)
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_test))
print(classification_report(y_test, y_pred_test))

Test accuracy: 0.3975
Inference time (avg ms/sample): 0.034849630000826436
Confusion matrix:
 [[496  49  34  21  24  21  27  53 194  81]
 [ 68 490  18  21  18  29  44  58  78 176]
 [110  52 227  69 104  88 168  90  62  30]
 [ 63  74  79 179  44 196 178  52  57  78]
 [ 64  39 105  51 261  89 194 125  32  40]
 [ 50  70  78 122  73 324 104  78  58  43]
 [ 26  57  62  83  84  73 512  41  24  38]
 [ 62  58  52  39  69  78  50 452  44  96]
 [135  77  12  18   7  36  19  22 563 111]
 [ 62 183  18  21  16  25  54  59  91 471]]
              precision    recall  f1-score   support

           0       0.44      0.50      0.46      1000
           1       0.43      0.49      0.46      1000
           2       0.33      0.23      0.27      1000
           3       0.29      0.18      0.22      1000
           4       0.37      0.26      0.31      1000
           5       0.34      0.32      0.33      1000
           6       0.38      0.51      0.44      1000
           7       0.44      0.45      0.4

# Moving onto ResNet18 for CIFAR10 dataset


In [24]:
# using pytorch for ResNet18
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset, TensorDataset
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet18
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import time


In [25]:
# converting the previously NHWC format(batch, height, width, channels) to NCHW format (batch, channels, height, width)
X_train_tensor = torch.tensor(X_train_cnn).permute(0, 3, 1, 2)  # (N, 3, 32, 32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)

X_test_tensor = torch.tensor(X_test_cnn).permute(0, 3, 1, 2)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# using resnet without pretrained weights, i.e training from scratch

In [26]:
device = "cuda" if torch.cuda.is_available() else "cpu"

def train_resnet(train_loader, val_loader=None, num_epochs=10, lr=0.001):
    model = resnet18(weights=None)
    model.fc = nn.Linear(model.fc.in_features, 10)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    best_val_acc = 0
    start_time = time.perf_counter()

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0, 0, 0

        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = preds.max(1)
            total += yb.size(0)
            correct += (predicted == yb).sum().item()

        train_acc = correct / total

        if val_loader:
            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for xb, yb in val_loader:
                    xb, yb = xb.to(device), yb.to(device)
                    preds = model(xb)
                    _, predicted = preds.max(1)
                    total += yb.size(0)
                    correct += (predicted == yb).sum().item()
            val_acc = correct / total
            print(f"Epoch {epoch+1}: "
                  f"Train Loss={running_loss/len(train_loader):.4f}, "
                  f"Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}")
            best_val_acc = max(best_val_acc, val_acc)

    total_time = time.perf_counter() - start_time
    return best_val_acc, total_time, model

In [27]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_results = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_tensor, y_train_tensor)):
    print(f"\n===== Fold {fold+1} =====")

    train_ds = TensorDataset(X_train_tensor[train_idx], y_train_tensor[train_idx])
    val_ds   = TensorDataset(X_train_tensor[val_idx], y_train_tensor[val_idx])

    train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False)

    best_val_acc, runtime, _ = train_resnet(train_loader, val_loader, num_epochs=10)

    cv_results.append((fold+1, best_val_acc, runtime))

print("\nCross-validation results:")
for fold, acc, rt in cv_results:
    print(f"Fold {fold}: Best Val Acc={acc:.4f}, Runtime={rt:.2f}s")


===== Fold 1 =====
Epoch 1: Train Loss=1.4404, Train Acc=0.4830, Val Acc=0.5310
Epoch 2: Train Loss=1.0581, Train Acc=0.6264, Val Acc=0.5467
Epoch 3: Train Loss=0.8890, Train Acc=0.6901, Val Acc=0.6265
Epoch 4: Train Loss=0.7574, Train Acc=0.7367, Val Acc=0.6625
Epoch 5: Train Loss=0.6483, Train Acc=0.7722, Val Acc=0.6769
Epoch 6: Train Loss=0.5586, Train Acc=0.8052, Val Acc=0.6820
Epoch 7: Train Loss=0.4718, Train Acc=0.8357, Val Acc=0.6808
Epoch 8: Train Loss=0.3880, Train Acc=0.8652, Val Acc=0.7426
Epoch 9: Train Loss=0.3144, Train Acc=0.8917, Val Acc=0.7340
Epoch 10: Train Loss=0.2592, Train Acc=0.9093, Val Acc=0.7323

===== Fold 2 =====
Epoch 1: Train Loss=1.4513, Train Acc=0.4784, Val Acc=0.5086
Epoch 2: Train Loss=1.0602, Train Acc=0.6282, Val Acc=0.6251
Epoch 3: Train Loss=0.8797, Train Acc=0.6913, Val Acc=0.6413
Epoch 4: Train Loss=0.7510, Train Acc=0.7371, Val Acc=0.6713
Epoch 5: Train Loss=0.6467, Train Acc=0.7750, Val Acc=0.6899
Epoch 6: Train Loss=0.5536, Train Acc=0.8079

In [28]:
# Train on full dataset
full_train_ds = TensorDataset(X_train_tensor, y_train_tensor)
full_train_loader = DataLoader(full_train_ds, batch_size=64, shuffle=True)

_, train_time, final_model = train_resnet(full_train_loader, num_epochs=15)  # train longer

print(f"\nFinal training time on full dataset: {train_time:.2f}s")


Final training time on full dataset: 241.61s


In [29]:
# Inference on test set
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=64, shuffle=False)

t0 = time.perf_counter()
y_true, y_pred = [], []

final_model.eval()
with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        preds = final_model(xb)
        _, predicted = preds.max(1)
        y_true.extend(yb.tolist())
        y_pred.extend(predicted.cpu().tolist())
t1 = time.perf_counter()

inf_time_per_sample = (t1 - t0) / len(X_test_tensor)

# Metrics
print("Test accuracy:", accuracy_score(y_true, y_pred))
print("Inference time (avg ms/sample):", inf_time_per_sample*1000)
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred))

Test accuracy: 0.7354
Inference time (avg ms/sample): 0.06238467000075616
Confusion matrix:
 [[832   9  20  12  32   4  16   3  35  37]
 [ 18 772   6   3   3   2   8   3  29 156]
 [ 66   6 548  56 156  31  79  25  17  16]
 [ 25   6  67 492 138  94  82  42  24  30]
 [ 13   4  22  25 822   4  36  64   7   3]
 [ 12   6  49 171 113 493  50  75  11  20]
 [  5   3   9  45  69  16 834   4   6   9]
 [ 15   3  18  15  69  19   9 825   6  21]
 [ 68  16   5   5  10   3   8   0 859  26]
 [ 33  27   4   9   2   4   7  11  26 877]]
              precision    recall  f1-score   support

           0       0.77      0.83      0.80      1000
           1       0.91      0.77      0.83      1000
           2       0.73      0.55      0.63      1000
           3       0.59      0.49      0.54      1000
           4       0.58      0.82      0.68      1000
           5       0.74      0.49      0.59      1000
           6       0.74      0.83      0.78      1000
           7       0.78      0.82      0.80

## Linear SVM Baseline

**Setup:**
- Input: Flattened CIFAR-10 images (32×32×3 → 3072 features).
- Preprocessing: Standardization with `StandardScaler`.
- Model: `LinearSVC` (only linear kernel, no RBF/poly).
- Hyperparameter Search: GridSearchCV with `C ∈ {0.1, 0.01, 0.001}` (5-fold CV).
- Training: 15 fits in total.

**Results:**
- Best Hyperparameter: `C = 0.001`
- Cross-Validation Accuracy: ~0.394
- Final Test Accuracy: **39.7%**
- Inference Speed: ~0.035 ms/sample

**Observations:**
- Limited by linear decision boundaries → struggles with complex, non-linear image patterns.
- Accuracy is only modestly above random guessing (10% for CIFAR-10).
- Confusion matrix shows widespread misclassification across categories.

## ResNet18 (CNN) Baseline

**Setup:**
- Input: CIFAR-10 images reshaped to `(N, 3, 32, 32)` and normalized to `[0,1]`.
- Model: ResNet18 (transfer learning variant, adapted for 10 classes).
- Training: 5-fold Stratified CV (10 epochs each fold).
- Optimizer: Adam, LR = 0.001, Batch size = 64.
- Final training on full training set with same configuration.

**Results:**
- Cross-Validation Accuracy (best per fold): ~73–75%
- Final Test Accuracy: **73.5%**
- Inference Speed: ~0.062 ms/sample
- Confusion matrix shows much stronger diagonal dominance compared to SVM.

**Observations:**
- CNN clearly outperforms SVM, capturing local spatial patterns in images.
- Some class confusion remains (e.g., between vehicles and animals).
- Stronger generalization even with relatively short training (10 epochs).

---

## Key Takeaways

- **SVM Baseline:** Demonstrates the challenge of applying classical ML directly to raw image pixels. Good as a "classical ML reference" but not competitive.
- **ResNet18 Baseline:** Achieves ~73% accuracy, showing the effectiveness of deep CNNs for image classification.

# Trying something further, with hyperparameter tuning

In [33]:
# A dictionary to store the results
learning_rates = [0.0001, 0.001, 0.01]
results = {}

# We'll use the first fold from your 5-fold split for this experiment
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train_idx, val_idx = next(iter(skf.split(X_train_tensor, y_train_tensor)))

# Create the datasets and dataloaders
train_ds = TensorDataset(X_train_tensor[train_idx], y_train_tensor[train_idx])
val_ds = TensorDataset(X_train_tensor[val_idx], y_train_tensor[val_idx])
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)

In [35]:
for lr in learning_rates:
    print(f"--- Training with learning rate: {lr} ---")
    
    # We only need the accuracy, so we can use _ to ignore the other return values
    best_val_acc, _, _ = train_resnet(train_loader, val_loader, num_epochs=15, lr=lr)
    
    results[lr] = best_val_acc

--- Training with learning rate: 0.0001 ---
Epoch 1: Train Loss=1.5729, Train Acc=0.4317, Val Acc=0.5115
Epoch 2: Train Loss=1.1996, Train Acc=0.5707, Val Acc=0.5618
Epoch 3: Train Loss=0.9855, Train Acc=0.6504, Val Acc=0.5921
Epoch 4: Train Loss=0.8014, Train Acc=0.7165, Val Acc=0.5802
Epoch 5: Train Loss=0.6312, Train Acc=0.7778, Val Acc=0.6056
Epoch 6: Train Loss=0.4707, Train Acc=0.8366, Val Acc=0.6031
Epoch 7: Train Loss=0.3409, Train Acc=0.8811, Val Acc=0.6038
Epoch 8: Train Loss=0.2621, Train Acc=0.9082, Val Acc=0.5948
Epoch 9: Train Loss=0.2140, Train Acc=0.9250, Val Acc=0.6198
Epoch 10: Train Loss=0.1728, Train Acc=0.9405, Val Acc=0.6072
Epoch 11: Train Loss=0.1604, Train Acc=0.9445, Val Acc=0.6119
Epoch 12: Train Loss=0.1403, Train Acc=0.9510, Val Acc=0.6154
Epoch 13: Train Loss=0.1325, Train Acc=0.9529, Val Acc=0.6132
Epoch 14: Train Loss=0.1181, Train Acc=0.9589, Val Acc=0.6132
Epoch 15: Train Loss=0.1099, Train Acc=0.9623, Val Acc=0.5979
--- Training with learning rate: 0.

In [36]:
print("\n--- Final Tuning Results ---")
print(results)


--- Final Tuning Results ---
{0.0001: 0.6198, 0.001: 0.7441, 0.01: 0.7297}


Based on above experimentation we dont get much improvement by just working with different lr values.

lets try with data augmentation

In [41]:
from torch.utils.data import Dataset

class CIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]
        
        # PyTorch transforms expect a PIL Image, so we convert the numpy array
        image = transforms.ToPILImage()(image)

        if self.transform:
            image = self.transform(image)
        
        return image, label
    
    
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor()
])

# Simpler pipeline for test data (no random augmentation)
val_transforms = transforms.Compose([
    transforms.ToTensor()
])

# --- 2. Create Datasets and DataLoaders ---
# Use the custom Dataset class to apply the transforms
# Remember X_train_img is the (N, H, W, C) numpy array from earlier
train_dataset = CIFAR10Dataset(images=X_train_img, labels=y_train, transform=train_transforms)
val_dataset = CIFAR10Dataset(images=X_train_img, labels=y_train, transform=val_transforms) # Use test transforms for validation

# Get a validation split
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

In [40]:
# We need to make sure the validation subset uses the simple test_transforms
val_subset.dataset = val_dataset

# Create the DataLoaders
exp1_train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
exp1_val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)

In [42]:
print("--- Running Experiment 1: Augmentation Only ---")
# We use the ORIGINAL train_resnet function from cell [26]
best_val_acc_exp1, runtime_exp1, model_exp1 = train_resnet(exp1_train_loader, exp1_val_loader, num_epochs=15)

print(f"\n--- Experiment 1 Complete ---")
print(f"Best Validation Accuracy with Augmentation: {best_val_acc_exp1:.4f}")

--- Running Experiment 1: Augmentation Only ---
Epoch 1: Train Loss=1.4616, Train Acc=0.4725, Val Acc=0.5186
Epoch 2: Train Loss=1.1124, Train Acc=0.6061, Val Acc=0.5870
Epoch 3: Train Loss=0.9536, Train Acc=0.6654, Val Acc=0.6182
Epoch 4: Train Loss=0.8633, Train Acc=0.7012, Val Acc=0.6918
Epoch 5: Train Loss=0.7867, Train Acc=0.7263, Val Acc=0.6866
Epoch 6: Train Loss=0.7309, Train Acc=0.7467, Val Acc=0.6860
Epoch 7: Train Loss=0.6758, Train Acc=0.7652, Val Acc=0.7456
Epoch 8: Train Loss=0.6398, Train Acc=0.7788, Val Acc=0.7458
Epoch 9: Train Loss=0.6007, Train Acc=0.7914, Val Acc=0.7640
Epoch 10: Train Loss=0.5640, Train Acc=0.8061, Val Acc=0.7156
Epoch 11: Train Loss=0.5284, Train Acc=0.8162, Val Acc=0.7658
Epoch 12: Train Loss=0.4997, Train Acc=0.8263, Val Acc=0.7852
Epoch 13: Train Loss=0.4676, Train Acc=0.8349, Val Acc=0.7750
Epoch 14: Train Loss=0.4442, Train Acc=0.8442, Val Acc=0.7740
Epoch 15: Train Loss=0.4247, Train Acc=0.8502, Val Acc=0.7394

--- Experiment 1 Complete ---


experiment 2 with LR scheduler

In [47]:
import copy
def train_resnet_with_scheduler(train_loader, val_loader, num_epochs=15, lr=0.001):
    """
    A training function that uses a ReduceLROnPlateau scheduler.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Using the original model with weights=None
    model = resnet18(weights=None)
    model.fc = nn.Linear(model.fc.in_features, 10)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # --- CHANGE 1: Create the scheduler ---
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)

    best_val_acc = 0
    start_time = time.perf_counter()

    for epoch in range(num_epochs):
        model.train()
        # Training loop for one epoch (as in the original function)
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        # --- CHANGE 2: Calculate validation loss and step the scheduler ---
        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                loss = criterion(preds, yb)
                val_loss += loss.item()
                _, predicted = preds.max(1)
                total += yb.size(0)
                correct += (predicted == yb).sum().item()
        
        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        
        print(f"Epoch {epoch+1}/{num_epochs}: Val Loss={avg_val_loss:.4f}, Val Acc={val_acc:.4f}")
        
        # Pass the validation loss to the scheduler
        scheduler.step(avg_val_loss)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = copy.deepcopy(model.state_dict())

    total_time = time.perf_counter() - start_time
        
    # We can return the final validation accuracy to compare experiments
    return best_val_acc, total_time, best_model_state

# def train_resnet(train_loader, val_loader=None, num_epochs=10, lr=0.001):
#     model = resnet18(weights=None)
#     model.fc = nn.Linear(model.fc.in_features, 10)
#     model = model.to(device)

#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.Adam(model.parameters(), lr=lr)

#     best_val_acc = 0
#     start_time = time.perf_counter()

#     for epoch in range(num_epochs):
#         model.train()
#         running_loss, correct, total = 0, 0, 0

#         for xb, yb in train_loader:
#             xb, yb = xb.to(device), yb.to(device)

#             optimizer.zero_grad()
#             preds = model(xb)
#             loss = criterion(preds, yb)
#             loss.backward()
#             optimizer.step()

#             running_loss += loss.item()
#             _, predicted = preds.max(1)
#             total += yb.size(0)
#             correct += (predicted == yb).sum().item()

#         train_acc = correct / total

#         if val_loader:
#             model.eval()
#             correct, total = 0, 0
#             with torch.no_grad():
#                 for xb, yb in val_loader:
#                     xb, yb = xb.to(device), yb.to(device)
#                     preds = model(xb)
#                     _, predicted = preds.max(1)
#                     total += yb.size(0)
#                     correct += (predicted == yb).sum().item()
#             val_acc = correct / total
#             print(f"Epoch {epoch+1}: "
#                   f"Train Loss={running_loss/len(train_loader):.4f}, "
#                   f"Train Acc={train_acc:.4f}, Val Acc={val_acc:.4f}")
#             best_val_acc = max(best_val_acc, val_acc)

#     total_time = time.perf_counter() - start_time
#     return best_val_acc, total_time, model

In [48]:
print("\n--- Running Experiment 2: Augmentation + Scheduler ---")
best_val_acc_exp2, runtime_exp2, model_exp2 = train_resnet_with_scheduler(exp1_train_loader, exp1_val_loader, num_epochs=15)
print(f"Best Validation Accuracy with Augmentation + Scheduler: {best_val_acc_exp2:.4f}")


--- Running Experiment 2: Augmentation + Scheduler ---
Epoch 1/15: Val Loss=1.2329, Val Acc=0.5596
Epoch 2/15: Val Loss=1.1830, Val Acc=0.5890
Epoch 3/15: Val Loss=0.9177, Val Acc=0.6762
Epoch 4/15: Val Loss=1.0434, Val Acc=0.6482
Epoch 5/15: Val Loss=0.8814, Val Acc=0.6960
Epoch 6/15: Val Loss=0.7438, Val Acc=0.7406
Epoch 7/15: Val Loss=0.9692, Val Acc=0.6736
Epoch 8/15: Val Loss=0.7487, Val Acc=0.7438
Epoch 9/15: Val Loss=0.9534, Val Acc=0.6896
Epoch 10/15: Val Loss=0.7217, Val Acc=0.7582
Epoch 11/15: Val Loss=0.7049, Val Acc=0.7702
Epoch 12/15: Val Loss=0.6746, Val Acc=0.7736
Epoch 13/15: Val Loss=0.7880, Val Acc=0.7414
Epoch 14/15: Val Loss=0.6298, Val Acc=0.7922
Epoch 15/15: Val Loss=0.6374, Val Acc=0.7906
Best Validation Accuracy with Augmentation + Scheduler: 0.7922


In [51]:
print(runtime_exp2)

229.06875680001394


Lets try using pre trained ResNet weights instead of training from scratch

In [49]:
transfer_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --- 2. Create Datasets and DataLoaders ---
# NOTE: We are NOT using random augmentation for this experiment
# The goal is to isolate the effect of transfer learning.
exp3_dataset = CIFAR10Dataset(images=X_train_img, labels=y_train, transform=transfer_transforms)

train_size = int(0.9 * len(exp3_dataset))
val_size = len(exp3_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(exp3_dataset, [train_size, val_size])

exp3_train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
exp3_val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)

In [55]:
def train_resnet_transfer(train_loader, val_loader, num_epochs=15, lr=0.001):
    """
    A simple training function that uses a pre-trained model.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # --- The key change is here ---
    model = resnet18(weights='IMAGENET1K_V1')
    model.fc = nn.Linear(model.fc.in_features, 10)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    best_val_acc = 0
    start_time = time.perf_counter()

    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                loss = criterion(preds, yb)
                val_loss += loss.item()
                _, predicted = preds.max(1)
                total += yb.size(0)
                correct += (predicted == yb).sum().item()
        val_acc = correct / total
        avg_val_loss = val_loss / len(val_loader)
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
        
        print(f"Epoch {epoch+1}/{num_epochs}: Val Loss={avg_val_loss:.4f}, Val Acc={val_acc:.4f}")

    total_time = time.perf_counter() - start_time
    return best_val_acc, total_time, model

In [56]:
print("\n--- Running Experiment 3: Transfer Learning Only ---")
best_val_acc_exp3, total_time_exp3, model_exp3  = train_resnet_transfer(exp3_train_loader, exp3_val_loader, num_epochs=15)
print(f"Best Validation Accuracy with Transfer Learning: {best_val_acc_exp3:.4f}")
print(total_time_exp3)


--- Running Experiment 3: Transfer Learning Only ---
Epoch 1/15: Val Loss=0.8554, Val Acc=0.7142
Epoch 2/15: Val Loss=0.8625, Val Acc=0.7378
Epoch 3/15: Val Loss=0.7418, Val Acc=0.7580
Epoch 4/15: Val Loss=0.6148, Val Acc=0.7904
Epoch 5/15: Val Loss=0.5853, Val Acc=0.8034
Epoch 6/15: Val Loss=0.6332, Val Acc=0.8046
Epoch 7/15: Val Loss=0.6428, Val Acc=0.8142
Epoch 8/15: Val Loss=0.6788, Val Acc=0.8092
Epoch 9/15: Val Loss=0.7252, Val Acc=0.8002
Epoch 10/15: Val Loss=0.7304, Val Acc=0.8074
Epoch 11/15: Val Loss=0.8368, Val Acc=0.7972
Epoch 12/15: Val Loss=0.7034, Val Acc=0.8122
Epoch 13/15: Val Loss=0.7973, Val Acc=0.8028
Epoch 14/15: Val Loss=0.8783, Val Acc=0.7972
Epoch 15/15: Val Loss=0.8682, Val Acc=0.8022
Best Validation Accuracy with Transfer Learning: 0.8142
230.4384614000155


One final experiment combining it all

In [57]:
final_train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

final_val_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [61]:
def train_resnet_final(train_loader, val_loader, num_epochs=15, lr=0.001):
    """
    The final, fully-upgraded training function combining all techniques.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # 1. Transfer Learning
    model = resnet18(weights='IMAGENET1K_V1')
    model.fc = nn.Linear(model.fc.in_features, 10)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # 2. Learning Rate Scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3)

    best_val_acc = 0.0
    start_time = time.perf_counter()

    for epoch in range(num_epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0.0
        correct, total = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                loss = criterion(preds, yb)
                val_loss += loss.item()
                _, predicted = preds.max(1)
                total += yb.size(0)
                correct += (predicted == yb).sum().item()
        
        avg_val_loss = val_loss / len(val_loader)
        val_acc = correct / total
        
        print(f"Epoch {epoch+1}/{num_epochs}: Val Loss={avg_val_loss:.4f}, Val Acc={val_acc:.4f}")
        
        scheduler.step(avg_val_loss)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            
    total_time = time.perf_counter() - start_time
    
    return best_val_acc, total_time, model

In [62]:
final_train_dataset = CIFAR10Dataset(images=X_train_img, labels=y_train, transform=final_train_transforms)
final_val_dataset = CIFAR10Dataset(images=X_train_img, labels=y_train, transform=final_val_transforms)

train_size = int(0.9 * len(final_train_dataset))
val_size = len(final_train_dataset) - train_size
train_subset, val_subset = torch.utils.data.random_split(final_train_dataset, [train_size, val_size])
val_subset.dataset = final_val_dataset

final_train_loader = DataLoader(train_subset, batch_size=64, shuffle=True)
final_val_loader = DataLoader(val_subset, batch_size=64, shuffle=False)

In [63]:
print("\n--- Running Experiment 4: All Together ---")
best_val_acc_exp4, final_runtime, final_model = train_resnet_final(final_train_loader, final_val_loader, num_epochs=15)
print(f"Best Validation Accuracy with All Techniques: {best_val_acc_exp4:.4f}")


--- Running Experiment 4: All Together ---
Epoch 1/15: Val Loss=0.8431, Val Acc=0.7158
Epoch 2/15: Val Loss=0.6477, Val Acc=0.7768
Epoch 3/15: Val Loss=0.6378, Val Acc=0.7784
Epoch 4/15: Val Loss=0.6120, Val Acc=0.7902
Epoch 5/15: Val Loss=0.5791, Val Acc=0.8026
Epoch 6/15: Val Loss=0.5613, Val Acc=0.8082
Epoch 7/15: Val Loss=0.8601, Val Acc=0.7836
Epoch 8/15: Val Loss=0.5480, Val Acc=0.8124
Epoch 9/15: Val Loss=0.5160, Val Acc=0.8286
Epoch 10/15: Val Loss=0.4931, Val Acc=0.8318
Epoch 11/15: Val Loss=0.5068, Val Acc=0.8294
Epoch 12/15: Val Loss=0.5238, Val Acc=0.8304
Epoch 13/15: Val Loss=0.5285, Val Acc=0.8232
Epoch 14/15: Val Loss=0.7846, Val Acc=0.7868
Epoch 15/15: Val Loss=0.5302, Val Acc=0.8406
Best Validation Accuracy with All Techniques: 0.8406
