In [80]:
from digits_recognition.dataset import load_dataset

in_path = r'../data/processed/dataset.pkl'

data = load_dataset(in_path)

In [81]:
import torch
from torch.utils.data import DataLoader, TensorDataset

X_train = torch.tensor(data['X_train'], dtype=torch.float32)
y_train = torch.tensor(data['y_train'], dtype=torch.long)
X_val = torch.tensor(data['X_val'], dtype=torch.float32)
y_val = torch.tensor(data['y_val'], dtype=torch.long)

#X_train = X_train / 255.0
#X_val = X_val / 255.0

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [82]:
import torch.nn as nn
import torch

torch.manual_seed(42)

class DigitClassifier(nn.Module):
    def __init__(self):
        super(DigitClassifier, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [83]:
patience = 10
epochs = 100
learning_rate = 0.001
weight_decay = 1e-2

In [84]:
import torch.optim as optim
from torch.optim.lr_scheduler import PolynomialLR
from tqdm.notebook import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = DigitClassifier().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = PolynomialLR(optimizer, total_iters=epochs, power=2.0)

best_val_loss = float('inf')
epochs_no_improve = 0

# Training loop
for epoch in tqdm(range(1, epochs+1), desc="Epochs"):
    model.train()

    tqdm.write(f"Learning rate: {scheduler.get_last_lr()}")
    train_loss = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch [{epoch}/{epochs}], Training", leave=False):
        X = images.to(device)
        y = labels.to(device)

        optimizer.zero_grad()

        logits = model(X)
        loss = criterion(logits, y)
        loss.backward()

        optimizer.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    tqdm.write(f"Epoch {epoch}, Train Loss: {avg_train_loss}")

    model.eval()

    val_loss = 0

    for images, labels in tqdm(val_loader, desc=f"Epoch [{epoch+1}/{epochs}], Validation", leave=False):
        X = images.to(device)
        y = labels.to(device)

        logits = model(X)

        loss = criterion(logits, y)

        val_loss += loss.item()

    avg_val_loss = val_loss / len(val_loader)

    tqdm.write(f"Epoch {epoch}, Validation Loss: {avg_val_loss}")

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), r'../models/weights.pth')
        tqdm.write("Best model weights have been saved.")
    else:
        epochs_no_improve += 1 

    if epochs_no_improve == patience:
        tqdm.write(f"Early stopping triggered after {epoch} epochs.")
        break

    scheduler.step()

Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Learning rate: [0.001]


Epoch [1/100], Training:   0%|          | 0/797 [00:00<?, ?it/s]

Epoch 1, Train Loss: 0.6628242371699666


Epoch [2/100], Validation:   0%|          | 0/141 [00:00<?, ?it/s]

Epoch 1, Validation Loss: 0.22073186422757646
Best model weights have been saved.
Learning rate: [0.0009801]


Epoch [2/100], Training:   0%|          | 0/797 [00:00<?, ?it/s]

KeyboardInterrupt: 