# Model Training with PyTorch

In [24]:
!pip install wandb



In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import wandb
from sklearn.metrics import precision_score, recall_score

In [26]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_layers, dropout_rate):
        super(LogisticRegression, self).__init__()
        layers = []
        # Input layer
        layers.append(nn.Linear(input_dim, hidden_layers[0]))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        
        # Hidden layers
        for i in range(1, len(hidden_layers)):
            layers.append(nn.Linear(hidden_layers[i-1], hidden_layers[i]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
        
        # Output layer
        layers.append(nn.Linear(hidden_layers[-1], output_dim))
        
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        return self.model(x)


In [27]:
# Hyperparameters to experiment with
config = {
    "learning_rate": 0.005,
    "epochs": 15,
    "batch_size": 32,
    "hidden_layers": [512, 256, 128],
    "dropout_rate": 0.4
}


In [34]:
# Initialize wandb
wandb.init(project="mnist-mlops", config=config)


VBox(children=(Label(value='0.010 MB of 0.010 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [35]:
# Load and preprocess data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)

# Initialize model, loss, and optimizer
model = LogisticRegression(input_dim=28*28, output_dim=10, hidden_layers=config["hidden_layers"], dropout_rate=config["dropout_rate"])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])

In [36]:
# Watch the model with wandb
wandb.watch(model, log="all")

[]

In [37]:
# Training loop
for epoch in range(config["epochs"]):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx % 10 == 0:
            # Log training loss, learning rate, and gradient norms
            wandb.log({
                "training_loss": running_loss / (batch_idx + 1),
                "learning_rate": optimizer.param_groups[0]['lr'],
                "gradient_norm": sum(p.grad.norm().item() for p in model.parameters() if p.grad is not None)
            })

    print(f'Epoch [{epoch + 1}/{config["epochs"]}], Loss: {running_loss / len(train_loader):.4f}')



Epoch [1/15], Loss: 0.7259
Epoch [2/15], Loss: 0.6493
Epoch [3/15], Loss: 0.6180
Epoch [4/15], Loss: 0.6237
Epoch [5/15], Loss: 0.6480
Epoch [6/15], Loss: 0.6127
Epoch [7/15], Loss: 0.6066
Epoch [8/15], Loss: 0.6260
Epoch [9/15], Loss: 0.6077
Epoch [10/15], Loss: 0.6176
Epoch [11/15], Loss: 0.6014
Epoch [12/15], Loss: 0.5814
Epoch [13/15], Loss: 0.6015
Epoch [14/15], Loss: 0.5984
Epoch [15/15], Loss: 0.5995


In [22]:
# Validation
model.eval()
correct = 0
total = 0
all_preds = []
all_targets = []
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(target.cpu().numpy())

accuracy = 100 * correct / total

# Log validation accuracy, precision, and recall
wandb.log({
    "validation_accuracy": accuracy,
    "precision": precision_score(all_targets, all_preds, average='weighted'),
    "recall": recall_score(all_targets, all_preds, average='weighted')
})

print(f'Validation Accuracy: {accuracy:.2f}%')

Validation Accuracy: 90.60%


In [39]:
# Function to log predictions
def log_predictions(model, data, target, num_samples=10):
    model.eval()
    with torch.no_grad():
        output = model(data)
        _, predicted = torch.max(output.data, 1)

    # Log images with predictions vs actual labels to W&B
    wandb.log({
        "predictions": [wandb.Image(data[i], caption=f"Predicted: {predicted[i].item()}, Actual: {target[i].item()}")
                        for i in range(num_samples)]
    })

In [40]:
# Log sample predictions at the end of training
log_predictions(model, next(iter(test_loader))[0][:10], next(iter(test_loader))[1][:10])

In [41]:
wandb.finish()

VBox(children=(Label(value='0.039 MB of 0.039 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
gradient_norm,▃▂▂▆▃▂▃▃▃▂▂▁▂▃▆▃▂▁▃▄▂▄▂▂▃▂▂▂▂▂▂▂▃▂▂▁▁▃█▄
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss,█▃▂▃▂▂▂▂▂▂▁▂▂▂▂▂▃▂▂▂▂▂▁▁▂▁▁▂▁▂▂▂▁▁▁▁▁▂▂▁

0,1
gradient_norm,1.73047
learning_rate,0.005
training_loss,0.59935


In [42]:
# Save the trained model
torch.save(model.state_dict(), "model.pth")