# 🧠 Machine Learning Assignment 2 – Part A
### Logistic & Softmax Regression using PyTorch

This notebook implements:
- **A1:** Data Preparation (MNIST, 60/20/20 split)
- **A2:** Logistic Regression (Binary 0 vs 1)
- **A3:** Softmax Regression (Multi-class 0–9)

We’ll use PyTorch, NumPy, and Matplotlib to implement both models and visualize training progress.

In [ ]:
# --- IMPORTS & SETUP ---
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (10,5)
print('✅ Libraries imported successfully')

## 🧩 Part A1 – Data Preparation

- Load MNIST dataset (70,000 samples)
- Normalize and flatten the images (28×28 → 784)
- Perform a **stratified split**: 60% train, 20% validation, 20% test

In [ ]:
transform = transforms.Compose([transforms.ToTensor()])

train_data = datasets.MNIST(root='./data', train=True, download=False, transform=transform)
test_data = datasets.MNIST(root='./data', train=False, download=False, transform=transform)

X = torch.cat([train_data.data, test_data.data], dim=0)
y = torch.cat([train_data.targets, test_data.targets], dim=0)

X = X.float() / 255.0
X_flat = X.view(-1, 28*28)

# Stratified 60/20/20 split
X_train, X_temp, y_train, y_temp = train_test_split(X_flat, y, test_size=0.4, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f'Train samples: {len(X_train)}')
print(f'Validation samples: {len(X_val)}')
print(f'Test samples: {len(X_test)}')

## ⚙️ Part A2 – Logistic Regression (Binary Classification 0 vs 1)

We’ll now train a logistic regression model to distinguish between digits **0 and 1**.

**Steps:**
1. Filter the dataset for digits 0 and 1.
2. Define logistic regression with sigmoid activation.
3. Train using **Binary Cross-Entropy Loss (BCELoss)**.
4. Plot loss and accuracy curves.
5. Evaluate on test data.

In [ ]:
# --- Helper function to filter specific digits (0 and 1) ---
def filter_digits(X, y, digits=(0, 1)):
    mask = (y == digits[0]) | (y == digits[1])
    return X[mask], y[mask]

X_train_bin, y_train_bin = filter_digits(X_train, y_train)
X_val_bin, y_val_bin = filter_digits(X_val, y_val)
X_test_bin, y_test_bin = filter_digits(X_test, y_test)

y_train_bin = y_train_bin.float().unsqueeze(1)
y_val_bin = y_val_bin.float().unsqueeze(1)
y_test_bin = y_test_bin.float().unsqueeze(1)

batch_size = 64
train_loader = DataLoader(TensorDataset(X_train_bin, y_train_bin), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_bin, y_val_bin), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test_bin, y_test_bin), batch_size=batch_size, shuffle=False)

print(f'Filtered binary training samples: {len(X_train_bin)}')

In [ ]:
# --- Logistic Regression Model ---
class LogisticRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(784, 1)
    def forward(self, x):
        return torch.sigmoid(self.linear(x))

model = LogisticRegressionModel()
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [ ]:
# --- Training ---
epochs = 10
train_losses, val_losses, train_accs, val_accs = [], [], [], []

for epoch in range(epochs):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = (outputs >= 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss = total_loss / len(train_loader)
    train_acc = correct / total

    model.eval()
    val_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            preds = (outputs >= 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_loader)
    val_acc = correct / total
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f} | Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

In [ ]:
# --- Plot Curves ---
plt.subplot(1,2,1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend(); plt.title('Loss Curves')
plt.subplot(1,2,2)
plt.plot(train_accs, label='Train Acc')
plt.plot(val_accs, label='Val Acc')
plt.legend(); plt.title('Accuracy Curves')
plt.show()

In [ ]:
# --- Test Evaluation ---
model.eval()
correct, total = 0, 0
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        preds = (outputs >= 0.5).float()
        all_preds.extend(preds.squeeze().numpy())
        all_labels.extend(labels.squeeze().numpy())
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_acc = correct / total
print(f"Final Test Accuracy (0 vs 1): {test_acc:.4f}")
cm = confusion_matrix(all_labels, all_preds)
plt.imshow(cm, cmap='Blues')
plt.title('Confusion Matrix – Binary (0 vs 1)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.colorbar()
plt.show()

## 🔢 Part A3 – Softmax Regression (Multi-class Classification 0–9)

Now we extend to classify **all 10 digits** using **Softmax Regression**.

- Model: Single linear layer (784 → 10)
- Loss: CrossEntropyLoss (which applies softmax internally)
- Metrics: Accuracy, confusion matrix, per-class report

In [ ]:
# Convert labels to long (required for CrossEntropyLoss)
y_train_long = y_train.long()
y_val_long = y_val.long()
y_test_long = y_test.long()

train_loader = DataLoader(TensorDataset(X_train, y_train_long), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val_long), batch_size=batch_size, shuffle=False)
test_loader = DataLoader(TensorDataset(X_test, y_test_long), batch_size=batch_size, shuffle=False)

In [ ]:
# --- Softmax Regression Model ---
class SoftmaxRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(784, 10)
    def forward(self, x):
        return self.linear(x)

model = SoftmaxRegressionModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [ ]:
# --- Training Loop ---
epochs = 10
train_losses, val_losses, train_accs, val_accs = [], [], [], []

for epoch in range(epochs):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss = total_loss / len(train_loader)
    train_acc = correct / total

    model.eval()
    val_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_loader)
    val_acc = correct / total
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f} | Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

In [ ]:
# --- Plot Curves ---
plt.subplot(1,2,1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.legend(); plt.title('Softmax Regression – Loss')
plt.subplot(1,2,2)
plt.plot(train_accs, label='Train Acc')
plt.plot(val_accs, label='Val Acc')
plt.legend(); plt.title('Softmax Regression – Accuracy')
plt.show()

In [ ]:
# --- Evaluate on Test Set ---
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.numpy())
        all_labels.extend(labels.numpy())

cm = confusion_matrix(all_labels, all_preds)
plt.imshow(cm, cmap='Blues')
plt.title('Confusion Matrix – Softmax (0–9)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.colorbar()
plt.show()

print('\nClassification Report:')
print(classification_report(all_labels, all_preds, digits=4))