In [23]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose

torch.cuda.is_available()


True

In [24]:
training_data = datasets.FashionMNIST(
    root="./data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="./data",
    train=False,
    download=True,
    transform=ToTensor()
)

class ToTe

In [25]:
train_dl = DataLoader(training_data, batch_size=64, shuffle=True, num_workers=4)
test_dl = DataLoader(test_data, batch_size=64, shuffle=True, num_workers=4)

In [26]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [27]:
print(device)

cuda


In [28]:
# Everthing in pytorch is a module, custom layers are modules, and Models too.
# The nn.Sequential is a module that contains other modules.
class Net(nn.Module):
    def __init__(self, input_neurons, output_neurons):
        super().__init__()

        # Flatten 1.
        self.flatten = nn.Flatten()

        # Dense 1
        self.dense1 = nn.Linear(input_neurons, 256)
        self.relu1 = nn.ReLU()

        # Dense 2
        self.dense2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()

        # Dropout 0.2
        self.dropout = nn.Dropout(0.2)

        # Dense 3
        self.dense3 = nn.Linear(128, output_neurons)


    def forward(self, x):
        x = self.flatten(x)
        
        x = self.dense1(x)
        x = self.relu1(x)
        
        x = self.dense2(x)
        x = self.relu2(x)
        
        x = self.dropout(x)
        
        x = self.dense3(x)

        return x


# Initialize the net
net = Net(28*28, 10)

# Move to GPU
net.to(device)

Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense1): Linear(in_features=784, out_features=256, bias=True)
  (relu1): ReLU()
  (dense2): Linear(in_features=256, out_features=128, bias=True)
  (relu2): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (dense3): Linear(in_features=128, out_features=10, bias=True)
)

In [29]:
print(net)

Net(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense1): Linear(in_features=784, out_features=256, bias=True)
  (relu1): ReLU()
  (dense2): Linear(in_features=256, out_features=128, bias=True)
  (relu2): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (dense3): Linear(in_features=128, out_features=10, bias=True)
)


In [30]:
def train(dataloader, model, criterion, optimizer):
    model.train()

    for idx, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)

        # Forward pass
        y_pred = model(X)

        # Compute loss
        loss = criterion(y_pred, y)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if idx % 10 == 0:
            print(f"Epoch: {idx}/{len(dataloader)} Loss: {loss.item()}")


def test(dataloader, model, criterion):
    model.eval()  # -> Eval mode

    total_loss = 0
    total_correct = 0

    with torch.no_grad():
        for X, y in dataloader:
            X = X.to(device)
            y = y.to(device)

            pred = model(X)

            loss = criterion(pred, y)
            total_loss += loss.item()

            _, pred_idx = torch.max(pred, dim=1)
            total_correct += torch.sum(pred_idx == y).item()

    return total_loss / len(dataloaderect / len(dataloader)
            

In [31]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

# Train the model
train(train_dl, net, criterion, optimizer)

# Test the model
test_loss, test_acc = test(test_dl, net, criterion)

Epoch: 0/938 Loss: 2.3100764751434326
Epoch: 10/938 Loss: 1.7960045337677002
Epoch: 20/938 Loss: 1.2295677661895752
Epoch: 30/938 Loss: 1.1003856658935547
Epoch: 40/938 Loss: 0.9090185165405273
Epoch: 50/938 Loss: 0.6407350897789001
Epoch: 60/938 Loss: 0.9122637510299683
Epoch: 70/938 Loss: 0.8925707936286926
Epoch: 80/938 Loss: 0.7210111021995544
Epoch: 90/938 Loss: 0.7114337086677551
Epoch: 100/938 Loss: 0.7569505572319031
Epoch: 110/938 Loss: 0.6603401899337769
Epoch: 120/938 Loss: 0.6825844645500183
Epoch: 130/938 Loss: 0.5834201574325562
Epoch: 140/938 Loss: 0.6621996760368347
Epoch: 150/938 Loss: 0.6589096188545227
Epoch: 160/938 Loss: 0.6676016449928284
Epoch: 170/938 Loss: 0.6308636665344238
Epoch: 180/938 Loss: 0.5597556829452515
Epoch: 190/938 Loss: 0.5062947273254395
Epoch: 200/938 Loss: 0.8088880777359009
Epoch: 210/938 Loss: 0.3685842752456665
Epoch: 220/938 Loss: 0.731110692024231
Epoch: 230/938 Loss: 0.5051828622817993
Epoch: 240/938 Loss: 0.6201303005218506
Epoch: 250/9

In [32]:
print(f"Test Loss: {test_loss} Test Accuracy: {test_acc}")

Test Loss: 0.41618773493037864 Test Accuracy: 54.1656050955414


In [33]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

# Enter eval mode
net.eval()

# Get data
x, y = test_data[0][0], test_data[0][1]

x = x.to(device)

with torch.no_grad():
    pred = net(x)

    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
