In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm.notebook import tqdm  # Use tqdm.notebook for best experience in Jupyter

# Set seeds for reproducibility
torch.manual_seed(42)

# ----- Model Definition -----
class MNIST_MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 512)
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.leaky_relu(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.dropout(x)
        return self.out(x)

# ----- Data Augmentation & Normalization -----
transform = transforms.Compose([
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load MNIST and split into train/val
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
val_size = 6000
train_size = len(dataset) - val_size
train_set, val_set = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
# Use no augmentation for validation
val_set.dataset.transform = transform

test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)

# ----- Model, Optimizer, Loss -----
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MNIST_MLP().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
criterion = nn.CrossEntropyLoss()

# ----- Training Loop -----
def run_epoch(loader, model, criterion, optimizer=None):
    is_train = optimizer is not None
    model.train() if is_train else model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    loop = tqdm(loader, leave=False)
    for x, y in loop:
        x, y = x.to(device), y.to(device)
        if is_train:
            optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        if is_train:
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)
        loop.set_postfix(loss=loss.item())
    avg_loss = total_loss / total
    acc = correct / total
    return avg_loss, acc

num_epochs = 20
best_val_loss = float('inf')
patience, patience_counter = 5, 0

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = run_epoch(train_loader, model, criterion, optimizer)
    val_loss, val_acc = run_epoch(val_loader, model, criterion)
    print(f"Epoch {epoch:02d}: "
          f"Train Loss {train_loss:.4f} | "
          f"Val Loss {val_loss:.4f} | "
          f"Val Acc {val_acc*100:.2f}%")
    # Early stopping (optional)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping!")
            break

# ----- Test Evaluation -----
test_loss, test_acc = run_epoch(test_loader, model, criterion)
print(f"Test Accuracy: {test_acc*100:.2f}%")


  0%|          | 0/422 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm.notebook import tqdm  # Use tqdm.notebook for best experience in Jupyter

# Set seeds for reproducibility
torch.manual_seed(0)

# ----- Model Definition -----
class MNIST_MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 512)
        self.out = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.out(x)

# ----- Data Augmentation & Normalization -----
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])


# Load MNIST and split into train/val
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
val_size = 6000
train_size = len(dataset) - val_size
train_set, val_set = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
# Use no augmentation for validation
val_set.dataset.transform = transform

test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)

# ----- Model, Optimizer, Loss -----
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MNIST_MLP().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# ----- Training Loop -----
def run_epoch(loader, model, criterion, optimizer=None):
    is_train = optimizer is not None
    model.train() if is_train else model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    loop = tqdm(loader, leave=False)
    for x, y in loop:
        x, y = x.to(device), y.to(device)
        if is_train:
            optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        if is_train:
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)
        loop.set_postfix(loss=loss.item())
    avg_loss = total_loss / total
    acc = correct / total
    return avg_loss, acc

num_epochs = 20
best_val_loss = float('inf')
patience, patience_counter = 5, 0

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = run_epoch(train_loader, model, criterion, optimizer)
    val_loss, val_acc = run_epoch(val_loader, model, criterion)
    print(f"Epoch {epoch:02d}: "
          f"Train Loss {train_loss:.4f} | "
          f"Val Loss {val_loss:.4f} | "
          f"Val Acc {val_acc*100:.2f}%")
    # Early stopping (optional)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping!")
            break

# ----- Test Evaluation -----
test_loss, test_acc = run_epoch(test_loader, model, criterion)
print(f"Test Accuracy: {test_acc*100:.2f}%")


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 01: Train Loss 0.2371 | Val Loss 0.1168 | Val Acc 96.73%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 02: Train Loss 0.0881 | Val Loss 0.1047 | Val Acc 97.08%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 03: Train Loss 0.0560 | Val Loss 0.0778 | Val Acc 97.75%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 04: Train Loss 0.0450 | Val Loss 0.0820 | Val Acc 97.77%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 05: Train Loss 0.0333 | Val Loss 0.0859 | Val Acc 97.52%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 06: Train Loss 0.0265 | Val Loss 0.0912 | Val Acc 97.60%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 07: Train Loss 0.0243 | Val Loss 0.0869 | Val Acc 97.55%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 08: Train Loss 0.0193 | Val Loss 0.0915 | Val Acc 97.87%
Early stopping!


  0%|          | 0/79 [00:00<?, ?it/s]

Test Accuracy: 97.95%


In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm.notebook import tqdm  # Use tqdm.notebook for best experience in Jupyter

# Set seeds for reproducibility
torch.manual_seed(0)

# ----- Model Definition -----
class MNIST_MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.out = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.out(x)

# ----- Data Augmentation & Normalization -----
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])


# Load MNIST and split into train/val
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
val_size = 6000
train_size = len(dataset) - val_size
train_set, val_set = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
# Use no augmentation for validation
val_set.dataset.transform = transform

test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)

# ----- Model, Optimizer, Loss -----
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MNIST_MLP().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# ----- Training Loop -----
def run_epoch(loader, model, criterion, optimizer=None):
    is_train = optimizer is not None
    model.train() if is_train else model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    loop = tqdm(loader, leave=False)
    for x, y in loop:
        x, y = x.to(device), y.to(device)
        if is_train:
            optimizer.zero_grad()
        logits = model(x)
        loss = criterion(logits, y)
        if is_train:
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        total_loss += loss.item() * x.size(0)
        preds = logits.argmax(dim=1)
        correct += (preds == y).sum().item()
        total += x.size(0)
        loop.set_postfix(loss=loss.item())
    avg_loss = total_loss / total
    acc = correct / total
    return avg_loss, acc

num_epochs = 20
best_val_loss = float('inf')
patience, patience_counter = 5, 0

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = run_epoch(train_loader, model, criterion, optimizer)
    val_loss, val_acc = run_epoch(val_loader, model, criterion)
    print(f"Epoch {epoch:02d}: "
          f"Train Loss {train_loss:.4f} | "
          f"Val Loss {val_loss:.4f} | "
          f"Val Acc {val_acc*100:.2f}%")
    # Early stopping (optional)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping!")
            break

# ----- Test Evaluation -----
test_loss, test_acc = run_epoch(test_loader, model, criterion)
print(f"Test Accuracy: {test_acc*100:.2f}%")


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 01: Train Loss 0.2888 | Val Loss 0.1462 | Val Acc 95.65%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 02: Train Loss 0.1052 | Val Loss 0.1414 | Val Acc 95.68%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 03: Train Loss 0.0680 | Val Loss 0.0936 | Val Acc 97.22%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 04: Train Loss 0.0492 | Val Loss 0.1153 | Val Acc 96.58%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 05: Train Loss 0.0391 | Val Loss 0.0818 | Val Acc 97.35%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 06: Train Loss 0.0292 | Val Loss 0.0894 | Val Acc 97.52%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 07: Train Loss 0.0243 | Val Loss 0.0892 | Val Acc 97.58%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 08: Train Loss 0.0220 | Val Loss 0.0908 | Val Acc 97.50%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 09: Train Loss 0.0196 | Val Loss 0.1086 | Val Acc 97.35%


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

Epoch 10: Train Loss 0.0146 | Val Loss 0.0892 | Val Acc 97.62%
Early stopping!


  0%|          | 0/79 [00:00<?, ?it/s]

Test Accuracy: 97.77%
