# SROP Winter 2025

### Dataset loading

In [1]:
import numpy as np
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
import matplotlib.pyplot as plt
import tqdm
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from torchsummary import summary

ModuleNotFoundError: No module named 'numpy'

In [None]:
DEVICE = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

In [None]:
train_dataset = torchvision.datasets.FashionMNIST(
    'data/', train=True, download=True,
    transform=torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])
)
test_dataset = torchvision.datasets.FashionMNIST(
    'data/', train=False, download=True,
    transform=torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize((0.1307,), (0.3081,))
    ])
)

# Create a 10% validation set using stratification.
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_dataset)),
    train_dataset.targets,
    stratify=train_dataset.targets,
    test_size=0.1
)
train_split = Subset(train_dataset, train_indices)
val_split = Subset(train_dataset, val_indices)

# Update the training set transform to include data augmentation.
augment_transform = torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomAffine(degrees=5, translate=(0.05, 0.05), scale=(0.95, 1.05), shear=0.05),
    torchvision.transforms.ColorJitter(brightness=0.1, contrast=0.1),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,))
])
train_split.dataset.transform = augment_transform

# Create DataLoader objects.
train_batch_size = 512
test_batch_size  = 256
train_batches = DataLoader(train_split, batch_size=train_batch_size, shuffle=True)
val_batches   = DataLoader(val_split, batch_size=train_batch_size, shuffle=True)
test_batches  = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

print("Number of training batches:", len(train_batches))
print("Number of validation batches:", len(val_batches))
print("Number of test batches:", len(test_batches))