In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool2(x)
        x = x.view(-1, 16 * 4 * 4)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# Define hyperparameters
learning_rate = 0.01
momentum = 0.9
delay = 1  # Change the delay as needed

# Load the dataset
train_set = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)

test_set = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize the network and optimizer
net = Net().to(device)
optimizer = optim.ASGD(net.parameters(), lr=learning_rate, alpha=momentum, t0=delay)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Train the network
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss.item()

        if i % 100 == 99:    # Print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print('Finished Training')

# Evaluate the network on the test dataset
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


[1,   100] loss: 2.303
[1,   200] loss: 2.300
[1,   300] loss: 2.297
[1,   400] loss: 2.292
[1,   500] loss: 2.287
[1,   600] loss: 2.276
[1,   700] loss: 2.253
[1,   800] loss: 2.171
[1,   900] loss: 1.791
[2,   100] loss: 1.188
[2,   200] loss: 1.048
[2,   300] loss: 0.995
[2,   400] loss: 0.950
[2,   500] loss: 0.882
[2,   600] loss: 0.826
[2,   700] loss: 0.834
[2,   800] loss: 0.805
[2,   900] loss: 0.788
[3,   100] loss: 0.764
[3,   200] loss: 0.738
[3,   300] loss: 0.738
[3,   400] loss: 0.703
[3,   500] loss: 0.691
[3,   600] loss: 0.683
[3,   700] loss: 0.675
[3,   800] loss: 0.674
[3,   900] loss: 0.658
[4,   100] loss: 0.659
[4,   200] loss: 0.653
[4,   300] loss: 0.642
[4,   400] loss: 0.636
[4,   500] loss: 0.618
[4,   600] loss: 0.605
[4,   700] loss: 0.603
[4,   800] loss: 0.606
[4,   900] loss: 0.609
[5,   100] loss: 0.577
[5,   200] loss: 0.591
[5,   300] loss: 0.575
[5,   400] loss: 0.584
[5,   500] loss: 0.583
[5,   600] loss: 0.548
[5,   700] loss: 0.546
[5,   800] 