# PyTorch + Fashionâ€‘MNIST (GPU if available)

This notebook trains `FashionCNN` using **CUDA GPU** if available.
If CUDA is not available, it will print a message and run on CPU instead.

In [None]:
import torch, torch.nn as nn

In [None]:
print('CUDA available:', torch.cuda.is_available())

In [None]:
import torch
import torch.nn as nn

class FashionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc1 = nn.Linear(64*6*6, 600)
        self.drop = nn.Dropout2d(0.25)
        self.fc2 = nn.Linear(600, 120)
        self.fc3 = nn.Linear(120, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.drop(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

In [None]:
import datetime
from torch.autograd import Variable
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import pandas as pd

def train(device_str="cpu", batch_size=100, lr=1e-3, epochs=5):
    device = torch.device(device_str)
    print("Using device:", device)

    training_data = datasets.FashionMNIST(root="../data", train=True, download=True, transform=ToTensor())
    test_data     = datasets.FashionMNIST(root="../data", train=False, download=True, transform=ToTensor())

    train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
    test_loader  = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    model = FashionCNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    results = []
    count = 0
    t0 = datetime.datetime.now()

    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            count += 1

            if count % 50 == 0:
                model.eval()
                correct = 0
                total = 0
                with torch.no_grad():
                    for x, y in test_loader:
                        x, y = x.to(device), y.to(device)
                        preds = model(x).argmax(1)
                        correct += (preds == y).sum().item()
                        total += len(y)
                acc = 100.0 * correct / total
                results.append(dict(
                    iteration=count,
                    loss=float(loss.item()),
                    accuracy=float(acc),
                    elapsed_time_sec=(datetime.datetime.now()-t0).total_seconds(),
                    batch_size=batch_size,
                    lr=lr,
                ))
                model.train()

    return pd.DataFrame(results)

In [None]:
import matplotlib.pyplot as plt
def plot_metrics(df):
    plt.figure()
    plt.plot(df['iteration'], df['loss'])
    plt.title("Loss over iterations")
    plt.show()

    plt.figure()
    plt.plot(df['iteration'], df['accuracy'])
    plt.title("Accuracy over iterations")
    plt.show()

    plt.figure()
    plt.plot(df['iteration'], df['elapsed_time_sec'])
    plt.title("Elapsed time (s) vs iterations")
    plt.show()

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
df = train(device_str=device, batch_size=100, lr=1e-3, epochs=5)
df.tail()
plot_metrics(df)