In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST',
    train=True, 
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [3]:
train_set.shape

AttributeError: 'FashionMNIST' object has no attribute 'shape'

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    
    def forward(self, t):
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.fc1(t.reshape(-1, 12*4*4)))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

In [None]:
torch.set_grad_enabled(False)

In [None]:
network = Network()

In [None]:
sample = next(iter(train_set))

In [None]:
image, label = sample

In [None]:
batch = torch.unsqueeze(image, dim=0)
batch.shape

In [None]:
pred = network(batch)

In [None]:
# if we want the prediction to be probabilities (positive numbers adding up t0 1)
# use the softmax() in the output layer

pred

In [None]:
label

In [None]:
pred.argmax(dim=1)

In [None]:
F.softmax(pred, dim=1)

In [None]:
F.softmax(pred, dim=1).sum()

### Train the Model Using a Batch of Input Data

In [None]:
data_loader = torch.utils.data.DataLoader(train_set, batch_size=10)

In [None]:
batch = next(iter(data_loader))

images, labels = batch
images.shape, labels.shape

In [None]:
preds = network(images)

In [None]:
preds

In [None]:
preds.argmax(dim=1)

In [None]:
labels

In [None]:
preds.argmax(dim=1).eq(labels).sum()

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
get_num_correct(preds, labels)

### The Training Process
1. Get batch to network
2. Pass batch to the network
3. Calculate the loss (difference between the prediction values and true values)
4. Calculate the gradient of the loss function with respect to the networks's weights
5. Update the weights using the gradients to reduce the loss
6. Repeat steps 1-5 until one epoch is completed (every batch has been passed through network)
7. Repeat steps 1-6 for as many epochs required to obtain the desired level of accuracy

In [None]:
import torch.optim as optim
torch.set_grad_enabled(True)

### Calcualte the Loss

In [None]:
loss = F.cross_entropy(preds, labels) # Calculate the loss
loss.item()

In [None]:
print(network.conv1.weight.grad)

### Updating the Weights

In [None]:
optimizer = optim.Adam(network.parameters(), lr=0.001)

In [None]:
loss.item()

In [None]:
get_num_correct(preds, labels)

In [None]:
optimizer.step() # updating the weights

In [None]:
preds = network(images)
loss = F.cross_entropy(preds, labels)

In [None]:
loss.item()

In [None]:
get_num_correct(preds, labels)

In [None]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.001)

batch = next(iter(train_loader))
images, labels = batch

preds = network(images)
loss = F.cross_entropy(preds, labels)

loss.backward()
optimizer.step()

In [None]:
print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())