In [1]:
import torch
from torch import nn

In [2]:
import torchvision
from torchvision import datasets

In [3]:
from torchvision import transforms

## Fashion MNIST

In [4]:
train_data = datasets.FashionMNIST(root="data", train=True, download=True, target_transform=None,
                                   transform=torchvision.transforms.ToTensor())

test_data = datasets.FashionMNIST(root="data", train=False, download=True, target_transform=None,
                                   transform=torchvision.transforms.ToTensor())

In [5]:
len(train_data), len(test_data)

(60000, 10000)

In [6]:
image, label = train_data[0]

In [7]:
class_names = train_data.classes
class_names

['T-shirt/top',
 'Trouser',
 'Pullover',
 'Dress',
 'Coat',
 'Sandal',
 'Shirt',
 'Sneaker',
 'Bag',
 'Ankle boot']

In [8]:
class_to_idx = train_data.class_to_idx

## Visualizing Data

In [9]:
import plotly.express as px
import numpy as np

In [10]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [11]:
from typing import Union

In [12]:
def showImage(image: Union[torch.Tensor, np.array], label: int) -> None:
    """Show Image of Fashion MNIST set with Label

    :param image: Image data
    :type image: Union[torch.Tensor, np.array]

    :param label: Label id
    :type label: int
    """

    class_nm = ['T-shirt/top','Trouser','Pullover','Dress',
                'Coat','Sandal','Shirt','Sneaker','Bag','Ankle boot']

    if image.ndim == 3:
        image = image.squeeze()

    fig = px.imshow(image, color_continuous_scale='gray', labels=dict(x=f"{class_nm[label]}"))
    fig.update_layout(coloraxis_showscale=False)
    fig.update_xaxes(showticklabels=False)
    fig.update_yaxes(showticklabels=False)

    fig.show()

In [13]:
showImage(image, label)

In [14]:
def plotRandomImg(train_data: torchvision.datasets.mnist.FashionMNIST, grid_len=5, seed=42):
    """_summary_

    :param train_data: Total Training Data
    :type train_data: torchvision.datasets.mnist.FashionMNIST

    :param grid_len: Number of grid, defaults to 5. Tot images = grid_len * grid_len
    :type grid_len: int, optional
    """
    torch.manual_seed(seed=seed)

    rand_indexs = torch.randint(0, len(train_data), size=[grid_len*grid_len])

    fig = make_subplots(grid_len, grid_len)
    
    row_ind_start = 1

    class_nm = ['T-shirt/top','Trouser','Pullover','Dress',
                'Coat','Sandal','Shirt','Sneaker','Bag','Ankle boot']
    samp_ind = 0

    while row_ind_start <=grid_len:
        col_ind_start = 1
        while col_ind_start <= grid_len:
            img_data, label = train_data[rand_indexs[samp_ind]]
            single_fig = go.Figure(go.Heatmap(z=img_data.squeeze()))
            fig.add_trace(single_fig.data[0], row_ind_start, col_ind_start)
            fig.update_yaxes(autorange="reversed")
            fig.update_xaxes(title_text = f"{class_nm[label]}", row=row_ind_start, col=col_ind_start)
            col_ind_start +=1
            samp_ind += 1
        row_ind_start += 1

    fig.update_layout(height=1000, width=1200)
    fig.update_layout(coloraxis_showscale=False)

    return fig

In [15]:
plotRandomImg(train_data, seed=212)

In [16]:
from torch.utils.data import DataLoader

In [17]:
BATCH_SIZE = 32

In [18]:
# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False # don't necessarily have to shuffle the testing data
)


In [19]:
train_features_batch, train_labels_batch = next(iter(train_dataloader))

In [20]:
x = train_features_batch[0]

In [21]:
x.flatten().shape

torch.Size([784])

In [22]:
train_features_batch[0].shape

torch.Size([1, 28, 28])

In [23]:


# Create a flatten layer
flatten_model = nn.Flatten() # all nn modules function as a model (can do a forward pass)

# Get a single sample
x = train_features_batch[0]

# Flatten the sample
output = flatten_model(x) # perform forward pass

# Print out what happened
print(f"Shape before flattening: {x.shape} -> [color_channels, height, width]")
print(f"Shape after flattening: {output.shape} -> [color_channels, height*width]")

Shape before flattening: torch.Size([1, 28, 28]) -> [color_channels, height, width]
Shape after flattening: torch.Size([1, 784]) -> [color_channels, height*width]


In [24]:
from torch import nn

In [25]:
class FashionMNISTModelV0(nn.Module):
    def __init__(self, input_shape:int, hidden_units:int, output_shape: int):
        super(FashionMNISTModelV0, self).__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.Linear(in_features=hidden_units, out_features=output_shape))
        
    def forward(self, x):
        return self.layer_stack(x)

In [26]:
torch.manual_seed(42)

# Need to setup model with input parameters
model_0 = FashionMNISTModelV0(input_shape=784, # one for every pixel (28x28)
    hidden_units=10, # how many units in the hiden layer
    output_shape=len(class_names) # one for every class
)
model_0.to("cpu")

FashionMNISTModelV0(
  (layer_stack): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
  )
)

In [27]:
# Import accuracy metric
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc


In [28]:
# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss() # this is also called "criterion"/"cost function" in some places
optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

## Coding for Training & Testing

In [29]:
from tqdm.auto import tqdm


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [31]:
epochs = 5

In [32]:
for epoch in tqdm(range(epochs)):
    print(f"Epoch : {epoch}\n---------")

    ## Training
    train_loss = 0
    ## Looping through training batches #
    for batch, (X, y) in enumerate(train_dataloader):
        ## Putting Model in Training Model ##
        model_0.train()

        # Forward Pass #
        y_pred = model_0(X)

        # loss #
        loss = loss_fn(y_pred, y)
        train_loss += loss

        # Optimizer Zero Grad #
        optimizer.zero_grad()

        # Back Prop #
        loss.backward()

        # Optimizer Step, weights updation #

        optimizer.step()

        if batch % 400 == 0:
            print(f"Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples")

    train_loss /= len(train_dataloader)

    ## Testing ##
    test_loss, test_acc = 0, 0
    model_0.eval()
    with torch.inference_mode():
        for X, y in test_dataloader:
            test_pred = model_0(X)
            test_loss += loss_fn(test_pred,y)

            test_acc += accuracy_fn(y, test_pred.argmax(dim=1))

        
        test_loss /= len(test_dataloader)
        test_acc /= len(test_dataloader)

## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f} | Test loss: {test_loss:.5f}, Test acc: {test_acc:.2f}%\n")



  0%|          | 0/5 [00:00<?, ?it/s]

Epoch : 0
---------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples


 20%|██        | 1/5 [00:02<00:08,  2.15s/it]


Train loss: 0.58804 | Test loss: 0.52466, Test acc: 81.48%

Epoch : 1
---------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples


 40%|████      | 2/5 [00:04<00:06,  2.10s/it]


Train loss: 0.47645 | Test loss: 0.47678, Test acc: 83.34%

Epoch : 2
---------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples


 60%|██████    | 3/5 [00:06<00:04,  2.10s/it]


Train loss: 0.45479 | Test loss: 0.55112, Test acc: 81.43%

Epoch : 3
---------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples


 80%|████████  | 4/5 [00:08<00:02,  2.08s/it]


Train loss: 0.44429 | Test loss: 0.48230, Test acc: 83.12%

Epoch : 4
---------
Looked at 0/60000 samples
Looked at 12800/60000 samples
Looked at 25600/60000 samples
Looked at 38400/60000 samples
Looked at 51200/60000 samples


100%|██████████| 5/5 [00:10<00:00,  2.08s/it]


Train loss: 0.43419 | Test loss: 0.46643, Test acc: 83.58%






In [32]:
torch.manual_seed(42)
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn):
    """Returns a dictionary containing the results of model predicting on data_loader.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            # Make predictions with the model
            y_pred = model(X)
            
            # Accumulate the loss and accuracy values per batch
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, 
                                y_pred=y_pred.argmax(dim=1)) # For accuracy, need the prediction labels (logits -> pred_prob -> pred_labels)
        
        # Scale loss and acc to find the average loss/acc per batch
        loss /= len(data_loader)
        acc /= len(data_loader)
        
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

In [33]:
eval_model(model_0, test_dataloader, loss_fn, accuracy_fn)

{'model_name': 'FashionMNISTModelV0',
 'model_loss': 0.46473875641822815,
 'model_acc': 83.27675718849841}

In [34]:
device = "mps"

In [35]:
# Create a model with non-linear and linear layers
class FashionMNISTModelV1(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(), # flatten inputs into single vector
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_shape),
            nn.ReLU()
        )
    
    def forward(self, x: torch.Tensor):
        return self.layer_stack(x)


In [36]:
torch.manual_seed(42)
model_1 = FashionMNISTModelV1(input_shape=784, # number of input features
    hidden_units=10,
    output_shape=len(class_names) # number of output classes desired
).to("mps") # send model to GPU if it's available
next(model_1.parameters()).device # check model device


device(type='mps', index=0)

In [37]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model_1.parameters(), 
                            lr=0.1)

In [38]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy_fn,
               device: torch.device = device):
    train_loss, train_acc = 0, 0
    model.to(device)
    for batch, (X, y) in enumerate(data_loader):
        # Send data to GPU
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss
        train_acc += accuracy_fn(y_true=y,
                                 y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

    # Calculate loss and accuracy per epoch and print out what's happening
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc:.2f}%")



In [39]:
def test_step(data_loader: torch.utils.data.DataLoader,
              model: torch.nn.Module,
              loss_fn: torch.nn.Module,
              accuracy_fn,
              device: torch.device = device):
    test_loss, test_acc = 0, 0
    model.to(device)
    model.eval() # put model in eval mode
    # Turn on inference context manager
    with torch.inference_mode(): 
        for X, y in data_loader:
            # Send data to GPU
            X, y = X.to(device), y.to(device)
            
            # 1. Forward pass
            test_pred = model(X)
            
            # 2. Calculate loss and accuracy
            test_loss += loss_fn(test_pred, y)
            test_acc += accuracy_fn(y_true=y,
                y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
            )
        
        # Adjust metrics and print out
        test_loss /= len(data_loader)
        test_acc /= len(data_loader)
        print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [40]:
epochs = 10
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train_step(data_loader=train_dataloader, 
        model=model_1, 
        loss_fn=loss_fn,
        optimizer=optimizer,
        accuracy_fn=accuracy_fn
    )
    test_step(data_loader=test_dataloader,
        model=model_1,
        loss_fn=loss_fn,
        accuracy_fn=accuracy_fn
    )

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0
---------
Train loss: 1.05878 | Train accuracy: 62.19%


 10%|█         | 1/10 [00:06<00:58,  6.50s/it]

Test loss: 0.99506 | Test accuracy: 64.13%

Epoch: 1
---------
Train loss: 0.91331 | Train accuracy: 66.40%


 20%|██        | 2/10 [00:12<00:50,  6.33s/it]

Test loss: 0.90508 | Test accuracy: 66.86%

Epoch: 2
---------
Train loss: 0.87982 | Train accuracy: 67.24%


 30%|███       | 3/10 [00:18<00:43,  6.24s/it]

Test loss: 0.89476 | Test accuracy: 66.46%

Epoch: 3
---------
Train loss: 0.86308 | Train accuracy: 67.59%


 40%|████      | 4/10 [00:24<00:37,  6.19s/it]

Test loss: 0.90772 | Test accuracy: 66.34%

Epoch: 4
---------
Train loss: 0.84894 | Train accuracy: 68.08%


 50%|█████     | 5/10 [00:31<00:31,  6.25s/it]

Test loss: 0.86968 | Test accuracy: 67.33%

Epoch: 5
---------
Train loss: 0.83957 | Train accuracy: 68.30%


 60%|██████    | 6/10 [00:37<00:25,  6.31s/it]

Test loss: 0.88513 | Test accuracy: 66.85%

Epoch: 6
---------
Train loss: 0.83135 | Train accuracy: 68.53%


 70%|███████   | 7/10 [00:43<00:18,  6.29s/it]

Test loss: 0.88407 | Test accuracy: 66.53%

Epoch: 7
---------
Train loss: 0.82611 | Train accuracy: 68.69%


 80%|████████  | 8/10 [00:50<00:12,  6.33s/it]

Test loss: 0.86298 | Test accuracy: 67.85%

Epoch: 8
---------
Train loss: 0.78784 | Train accuracy: 70.45%


 90%|█████████ | 9/10 [00:57<00:06,  6.47s/it]

Test loss: 0.68366 | Test accuracy: 75.18%

Epoch: 9
---------
Train loss: 0.63120 | Train accuracy: 76.73%


100%|██████████| 10/10 [01:03<00:00,  6.34s/it]

Test loss: 0.69565 | Test accuracy: 74.86%






In [41]:


# Move values to device
torch.manual_seed(42)
def eval_model(model: torch.nn.Module, 
               data_loader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               accuracy_fn, 
               device: torch.device = device):
    """Evaluates a given model on a given dataset.

    Args:
        model (torch.nn.Module): A PyTorch model capable of making predictions on data_loader.
        data_loader (torch.utils.data.DataLoader): The target dataset to predict on.
        loss_fn (torch.nn.Module): The loss function of model.
        accuracy_fn: An accuracy function to compare the models predictions to the truth labels.
        device (str, optional): Target device to compute on. Defaults to device.

    Returns:
        (dict): Results of model making predictions on data_loader.
    """
    loss, acc = 0, 0
    model.eval()
    with torch.inference_mode():
        for X, y in data_loader:
            # Send data to the target device
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss += loss_fn(y_pred, y)
            acc += accuracy_fn(y_true=y, y_pred=y_pred.argmax(dim=1))
        
        # Scale loss and acc
        loss /= len(data_loader)
        acc /= len(data_loader)
    return {"model_name": model.__class__.__name__, # only works when model was created with a class
            "model_loss": loss.item(),
            "model_acc": acc}

In [42]:
# Calculate model 1 results with device-agnostic code 
model_1_results = eval_model(model=model_1, data_loader=test_dataloader,
    loss_fn=loss_fn, accuracy_fn=accuracy_fn,
    device=device
)
model_1_results

{'model_name': 'FashionMNISTModelV1',
 'model_loss': 0.6956480741500854,
 'model_acc': 74.86022364217253}

## 1st CNN Model

In [58]:
class FashionMnistModelV2(nn.Module):
    """Tiny VGG Implmentation in PyTorch
    """

    def __init__(self, input_shape: int, hidden_units:int, output_shape:int):
        super(FashionMnistModelV2, self).__init__()

        self.conv_block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)          
        )

        self.conv_block_2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        self.classifer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=735,
                      out_features=output_shape)
        )

    def forward(self, x:torch.tensor):
        x = self.conv_block_1(x)
        print(x.shape)

        x = self.conv_block_2(x)
        print(x.shape)

        x = self.classifer(x)
        print(x.shape)

In [59]:
torch.manual_seed(34)
model_cnn = FashionMnistModelV2(input_shape=1, hidden_units=15, output_shape=10)

In [60]:
from torchsummary import summary

In [64]:
images = torch.randn(size=(32,3,64,64))

In [65]:
test_image = images[0]

In [76]:
conv_layer = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3, padding=2)

In [77]:
conv_layer(test_image).shape


torch.Size([10, 66, 66])

In [70]:
nn.Conv2d?

[0;31mInit signature:[0m
[0mnn[0m[0;34m.[0m[0mConv2d[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0min_channels[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout_channels[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkernel_size[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mint[0m[0;34m][0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstride[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mint[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpadding[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mint[0m[0;34m,[0m [0mTuple[0m[0;34m[[0m[0mint[0m[0;34m,[0m [0mint[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdilation[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m