In [3]:
# single_gpu_example.py
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import time


# ----- Define a simple CNN -----
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)  # MNIST has 1 channel
        self.conv2 = nn.Conv2d(32, 64, 3, 1)

        # Calculate the size of the flattened layer dynamically
        # Pass a dummy tensor through the conv layers to determine the size
        dummy_input = torch.randn(1, 1, 28, 28) # Assuming MNIST input size
        x = self.conv1(dummy_input)
        x = self.conv2(x)
        self._to_linear = x.view(x.size(0), -1).size(1)

        self.fc1 = nn.Linear(self._to_linear, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))

        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# ----- Training loop -----
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)}] Loss: {loss.item():.6f}")


# ----- Main function -----
def main():
    # Device setup
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Data loading
    transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Model and optimizer
    model = SimpleCNN().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train for 2 epochs
    start_time = time.time()
    for epoch in range(1, 3):
        train(model, device, train_loader, optimizer, epoch)
    total_time = time.time() - start_time

    print(f"Training completed in: {total_time:.2f} seconds")


if __name__ == "__main__":
    main()

Using device: cpu
Train Epoch: 1 [0/60000] Loss: 2.303463
Train Epoch: 1 [6400/60000] Loss: 0.208762
Train Epoch: 1 [12800/60000] Loss: 0.268926
Train Epoch: 1 [19200/60000] Loss: 0.084896
Train Epoch: 1 [25600/60000] Loss: 0.089359
Train Epoch: 1 [32000/60000] Loss: 0.040125
Train Epoch: 1 [38400/60000] Loss: 0.039047
Train Epoch: 1 [44800/60000] Loss: 0.157143
Train Epoch: 1 [51200/60000] Loss: 0.043174
Train Epoch: 1 [57600/60000] Loss: 0.090539
Train Epoch: 2 [0/60000] Loss: 0.080479
Train Epoch: 2 [6400/60000] Loss: 0.005220
Train Epoch: 2 [12800/60000] Loss: 0.078889
Train Epoch: 2 [19200/60000] Loss: 0.025349
Train Epoch: 2 [25600/60000] Loss: 0.018347
Train Epoch: 2 [32000/60000] Loss: 0.021410
Train Epoch: 2 [38400/60000] Loss: 0.112373
Train Epoch: 2 [44800/60000] Loss: 0.033357
Train Epoch: 2 [51200/60000] Loss: 0.011556
Train Epoch: 2 [57600/60000] Loss: 0.177160
Training completed in: 414.96 seconds
