In [1]:
import torchvision.transforms as transforms
import torch.optim as optim
import time
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from tqdm.notebook import tqdm

device = 'cuda'

In [2]:
batch_size = 64
# Load the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)), transforms.Resize((224, 224))])
train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='./data', train=False, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [3]:
class AlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(1, 96, 11, 4, 0, device=device), # 54 x 54
            nn.ReLU(),
            nn.MaxPool2d(3, 2),             # 26 x 26
            nn.Conv2d(96, 256, 5, 1, 2),    # 26 x 26
            nn.ReLU(),
            nn.MaxPool2d(3, 2),             # 12 x 12
            nn.Conv2d(256, 384, 3, 1, 1),   # 12 x 12
            nn.ReLU(),
            nn.Conv2d(384, 384, 3, 1, 1),   # 12 x 12
            nn.ReLU(),
            nn.Conv2d(384, 256, 3, 1, 1),   # 12 x 12
            nn.ReLU(),
            nn.MaxPool2d(3, 2),              # 5 x 5
            nn.Flatten()
        ) # in the end we have shape: 64 x 256 x 5 x 5
        # 64 from the batches
        self.fc = nn.Sequential(
            nn.Linear(256 * 5 * 5 , 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 1000)
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.fc(x)
        return x

In [5]:
model = AlexNet().to(device)

model.train()

optimizer = optim.Adam(model.parameters(), 0.001)
loss_fn = nn.CrossEntropyLoss()

total_step = len(train_loader)
start_time = time.time()
for epoch in range(10):
    epoch_loss = 0.0
    for i, (images, labels) in enumerate(tqdm(train_loader, total=total_step, leave=True)):
        images = images.to('cuda')
        labels = labels.to('cuda')

        outputs = model(images)
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()


    # Print average epoch loss
    average_loss = epoch_loss / total_step
    print(f"Epoch [{epoch+1}/{5}], Average Loss: {average_loss:.4f}")

end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time:.2f} seconds")



  0%|          | 0/938 [00:00<?, ?it/s]

Epoch [1/5], Average Loss: 0.6946


  0%|          | 0/938 [00:00<?, ?it/s]

Epoch [2/5], Average Loss: 0.3664


  0%|          | 0/938 [00:00<?, ?it/s]

Epoch [3/5], Average Loss: 0.3192


  0%|          | 0/938 [00:00<?, ?it/s]

Epoch [4/5], Average Loss: 0.2976


  0%|          | 0/938 [00:00<?, ?it/s]

Epoch [5/5], Average Loss: 0.2998


  0%|          | 0/938 [00:00<?, ?it/s]

KeyboardInterrupt: 