In [None]:
import torchvision
import torch
from torchvision.datasets import FashionMNIST
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load data
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])


dataset = torchvision.datasets.FashionMNIST(root='data',
                                            train=True,
                                            transform=transform,
                                            download=True)
loader = torch.utils.data.DataLoader(dataset,
                                     batch_size = 1024,
                                     shuffle = False,
                                     num_workers = 4)

mean = 0.0
for images, _ in loader:
    batch_samples = images.size(0)  # Batch size
    images = images.view(batch_samples, images.size(1), -1) # convert from (batch, 1, 28, 28) to (batch, 1, 784)
    mean += images.mean(2).sum(0)
mean = mean / len(loader.dataset)

variance = 0.0
for images, _ in loader:
    batch_samples = images.size(0)
    images = images.view(batch_samples, images.size(1), -1) # convert from (batch, 1, 28, 28) to (batch, 1, 784)
    variance += ((images - mean.unsqueeze(1))**2).sum([0,2]) # unsqueeze used to create a new dimension to
    # match the dimension of images
    std = torch.sqrt(variance / (len(loader.dataset)*28*28)) # 28*28 is the size of the image, len of the dataset is the number of images

print(mean, std)

In [None]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                            transforms.Normalize((mean, ), (std, ))])
train_data = FashionMNIST(root = 'data',
                          train = True,
                          download = True,
                          transform = transform)
data_loader = DataLoader(dataset = train_data, batch_size = 1024, shuffle = True)

test_data = FashionMNIST(root = 'data',
                         train = False,
                         download = True,
                         transform = transform)
test_loader = DataLoader(dataset = test_data, batch_size = 1024, shuffle = True)


# Print 1 image
image, label = train_data[0]
print(image.shape)

In [None]:
#Define show image function
import matplotlib.pyplot as plt
import numpy as np
def show_image(image):
    image = image / 2.0 + 0.5
    img = image.numpy()
    img = np.transpose(img, (1, 2, 0))
    plt.imshow(img, cmap = 'gray')
    plt.show()

for i, (images, label) in enumerate(data_loader):
    show_image(torchvision.utils.make_grid(images[:8]))
    break

In [None]:
# Define model
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 10)
)

model.to(device)
print(model)

In [None]:
input_tensor = torch.randn(5, 28, 28).to(device)
output = model(input_tensor)
print(output.shape)

# Define Loss, Optimizer, and evaluation function

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)

# Evaluation function
def evaluate(model, test_loader, criterion):
    model.eval()
    total = 0
    correct = 0
    test_loss = 0.0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    test_loss = test_loss / len(test_loader)
    return test_loss , accuracy

In [None]:
test_loss ,accuracy = evaluate(model, test_loader, criterion)
print(f"Test Loss: {test_loss:.4f}")
print(f"Accuracy: {accuracy:.2f}%")

#Train model

In [None]:
#define paremeters
train_loss = []
train_accuracy = []
test_loss = []
test_accuracy = []

# Train the model
max_epochs = 100
for epoch in range(max_epochs):
    # Initialize some parameters
    running_loss = 0.0
    running_corrects = 0.0
    total = 0
    
    for i, (images, labels) in enumerate(data_loader):
        images, labels = images.to(device), labels.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        #Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()

        #Determine class predictions and accuracy

        _, predicted = torch.max(outputs.data, 1)
        running_corrects += (predicted == labels).sum().item()
        total += labels.size(0)

        # Backward and optimize
        loss.backward()
        optimizer.step()

    epoch_accuracy = 100 * running_corrects / total
    epoch_loss = running_loss / len(data_loader)
    test_loss_epoch, test_accuracy_epoch = evaluate(model, test_loader, criterion)
    print(f"Epoch {epoch+1}/{max_epochs}, Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%, Test Loss: {test_loss_epoch:.4f}, Test Accuracy: {test_accuracy_epoch:.2f}%")
    train_loss.append(epoch_loss)
    train_accuracy.append(epoch_accuracy)
    test_loss.append(test_loss_epoch)
    test_accuracy.append(test_accuracy_epoch)

In [None]:
# Plot loss and accuracy
plt.plot(train_loss, label = 'train loss')
plt.plot(test_loss, label = 'test loss')
plt.legend()
plt.show()

In [None]:
plt.plot(train_accuracy, label = 'train accuracy')
plt.plot(test_accuracy, label = 'test accuracy')
plt.legend()
plt.show()