# CAP4453 Robot Vision: Softmax Classifier

This notebook provides a skeleton implementation of a one‑layer linear classifier using PyTorch. You will load the CIFAR‑10 or MNIST dataset, flatten the images, and train a linear classifier with softmax loss. The code below is only a starting point; feel free to modify it as needed. The model will automatically use a GPU if available.


In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt

# Choose device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Select dataset: 'cifar10' or 'mnist'
dataset_name = 'cifar10'

if dataset_name.lower() == 'cifar10':
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
    num_classes = 10
    input_dim = 32*32*3
elif dataset_name.lower() == 'mnist':
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    num_classes = 10
    input_dim = 28*28
else:
    raise ValueError('Unknown dataset')

# Split train into train/val
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

print(f"Train samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}, Test samples: {len(test_dataset)}")


Using device: cpu
Files already downloaded and verified


  entry = pickle.load(f, encoding="latin1")


Files already downloaded and verified
Train samples: 45000, Validation samples: 5000, Test samples: 10000


In [None]:
# Define a simple linear classifier using PyTorch
import torch.nn as nn

class LinearClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # flatten
        return self.fc(x)

# Hyperparameter combinations to test
learning_rates = [1e-4, 1e-3, 1e-2, 1e-1]
weight_decays = [0, 1e-5, 1e-4, 1e-3]
batch_sizes = [64, 128, 256]
num_epochs = 10

# Store results for all experiments
all_results = []
best_val_acc = 0
best_config = {}

print("\n")
print("HYPERPARAMETER SEARCH FOR LINEAR CLASSIFIER")
print("\n")

# Test all combinations
for batch_size in batch_sizes:
    # Create data loaders with current batch size
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    
    for lr in learning_rates:
        for wd in weight_decays:
            print(f"\n")
            print(f"Testing: batch_size={batch_size}, lr={lr}, weight_decay={wd}")
            print(f"\n")
            
            # Initialize model, loss function and optimizer
            model = LinearClassifier(input_dim, num_classes).to(device)
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
            
            # Track metrics for this configuration
            train_losses = []
            train_accs = []
            val_accs = []
            
            # Training loop
            for epoch in range(num_epochs):
                model.train()
                running_loss = 0.0
                correct = 0
                total = 0
                
                # Sample minibatches and train
                for images, labels in train_loader:
                    images, labels = images.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item() * images.size(0)
                    _, predicted = outputs.max(1)
                    total += labels.size(0)
                    correct += predicted.eq(labels).sum().item()
                train_loss = running_loss / total
                train_acc = 100. * correct / total
                
                # Evaluate on validation set
                model.eval()
                val_correct = 0
                val_total = 0
                with torch.no_grad():
                    for images, labels in val_loader:
                        images, labels = images.to(device), labels.to(device)
                        outputs = model(images)
                        _, predicted = outputs.max(1)
                        val_total += labels.size(0)
                        val_correct += predicted.eq(labels).sum().item()
                val_acc = 100. * val_correct / val_total
                
                train_losses.append(train_loss)
                train_accs.append(train_acc)
                val_accs.append(val_acc)
                
                print(f"Epoch {epoch+1}/{num_epochs}, Train loss: {train_loss:.4f}, Train acc: {train_acc:.2f}%, Val acc: {val_acc:.2f}%")
            
            final_val_acc = val_accs[-1]
            
            all_results.append({
                'batch_size': batch_size,
                'learning_rate': lr,
                'weight_decay': wd,
                'final_val_acc': final_val_acc,
                'train_losses': train_losses,
                'train_accs': train_accs,
                'val_accs': val_accs
            })
            
            if final_val_acc > best_val_acc:
                best_val_acc = final_val_acc
                best_config = {
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'weight_decay': wd,
                    'val_acc': final_val_acc,
                    'train_losses': train_losses,
                    'train_accs': train_accs,
                    'val_accs': val_accs
                }
            
            print(f"\nFinal validation accuracy: {final_val_acc:.2f}%")

print("\n")
print("BEST CONFIGURATION FOUND:")
print("\n")
print(f"Batch Size: {best_config['batch_size']}")
print(f"Learning Rate: {best_config['learning_rate']}")
print(f"Weight Decay: {best_config['weight_decay']}")
print(f"Best Validation Accuracy: {best_config['val_acc']:.2f}%")
print("\n")

HYPERPARAMETER SEARCH FOR LINEAR CLASSIFIER

Testing: batch_size=64, lr=0.0001, weight_decay=0
Epoch 1/10, Train loss: 2.2855, Train acc: 12.56%, Val acc: 15.92%
Epoch 2/10, Train loss: 2.2335, Train acc: 18.69%, Val acc: 21.82%
Epoch 3/10, Train loss: 2.1967, Train acc: 22.46%, Val acc: 23.58%
Epoch 4/10, Train loss: 2.1666, Train acc: 24.44%, Val acc: 25.50%
Epoch 5/10, Train loss: 2.1414, Train acc: 25.82%, Val acc: 26.84%
Epoch 6/10, Train loss: 2.1201, Train acc: 26.90%, Val acc: 27.26%
Epoch 7/10, Train loss: 2.1017, Train acc: 27.74%, Val acc: 28.00%
Epoch 8/10, Train loss: 2.0856, Train acc: 28.31%, Val acc: 29.06%
Epoch 9/10, Train loss: 2.0715, Train acc: 29.04%, Val acc: 29.28%
Epoch 10/10, Train loss: 2.0588, Train acc: 29.63%, Val acc: 29.56%

Final validation accuracy: 29.56%

Testing: batch_size=64, lr=0.0001, weight_decay=1e-05
Epoch 1/10, Train loss: 2.2859, Train acc: 13.89%, Val acc: 17.80%
Epoch 2/10, Train loss: 2.2345, Train acc: 19.47%, Val acc: 20.38%
Epoch 3/10

KeyboardInterrupt: 

### Next steps

- Experiment with different learning rates, regularization strengths and batch sizes. Please record (e.g., screenshot, copy training/val log history etc) the validation set performances with different hyper‑parameters in the report. 
- After choosing hyper‑parameters, train on the combined train+validation set and report the test accuracy.
- Plot training and validation accuracies over epochs.
- Compare these results with those obtained using a two‑layer network (see the next notebook).


In [None]:
import pandas as pd

print("\n")
print("SUMMARY OF ALL EXPERIMENTS")
print("\n")

df_results = pd.DataFrame([{
    'Batch Size': r['batch_size'],
    'Learning Rate': r['learning_rate'],
    'Weight Decay': r['weight_decay'],
    'Final Val Acc': f"{r['final_val_acc']:.2f}%"
} for r in all_results])

df_results = df_results.sort_values(by='Final Val Acc', ascending=False)
print(df_results.to_string(index=False))

In [None]:
# Plot training curves for best configuration
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Plot accuracy
axes[0].plot(range(1, len(best_config['train_accs']) + 1), best_config['train_accs'], 
             'b-', label='Train Accuracy', marker='o')
axes[0].plot(range(1, len(best_config['val_accs']) + 1), best_config['val_accs'], 
             'r-', label='Validation Accuracy', marker='s')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy (%)')
axes[0].set_title('Linear Classifier: Training and Validation Accuracy')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot loss
axes[1].plot(range(1, len(best_config['train_losses']) + 1), best_config['train_losses'], 
             'g-', label='Train Loss', marker='o')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].set_title('Linear Classifier: Training Loss')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('lc_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nBest Configuration:")
print(f"Batch Size: {best_config['batch_size']}")
print(f"Learning Rate: {best_config['learning_rate']}")
print(f"Weight Decay: {best_config['weight_decay']}")
print(f"Final Validation Accuracy: {best_config['val_accs'][-1]:.2f}%")

## Final Evaluation on Test Set

Now train with best hyperparameters on combined train+validation set and evaluate on test set.

In [None]:
# Combine train and validation sets
from torch.utils.data import ConcatDataset

combined_dataset = ConcatDataset([train_dataset, val_dataset])
combined_loader = DataLoader(combined_dataset, batch_size=best_config['batch_size'], 
                            shuffle=True, num_workers=0)

print("\n")
print("FINAL TRAINING ON TRAIN+VALIDATION SET")
print("\n")
print(f"  Batch Size: {best_config['batch_size']}")
print(f"  Learning Rate: {best_config['learning_rate']}")
print(f"  Weight Decay: {best_config['weight_decay']}")
print(f"\nTraining on {len(combined_dataset)} samples...")
print("\n")

final_model = LinearClassifier(input_dim, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(final_model.parameters(), 
                            lr=best_config['learning_rate'], 
                            weight_decay=best_config['weight_decay'])

num_epochs_final = 20
for epoch in range(num_epochs_final):
    final_model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in combined_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = final_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    train_loss = running_loss / total
    train_acc = 100. * correct / total
    print(f"Epoch {epoch+1}/{num_epochs_final}, Train loss: {train_loss:.4f}, Train acc: {train_acc:.2f}%")

print("\n")
print("FINAL TEST SET EVALUATION")
print("\n")

test_loader = DataLoader(test_dataset, batch_size=best_config['batch_size'], 
                        shuffle=False, num_workers=0)

final_model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = final_model(images)
        _, predicted = outputs.max(1)
        test_total += labels.size(0)
        test_correct += predicted.eq(labels).sum().item()

test_acc = 100. * test_correct / test_total
print(f"\nFINAL TEST ACCURACY: {test_acc:.2f}%")
print("\n")