In [20]:
import torch
from torch import nn, optim, save
from torchvision import transforms, models
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10

from pathlib import Path
from os import cpu_count, remove

In [2]:
!pip install -q torchinfo

In [3]:
from torchinfo import summary

## Setting Device Agnostic Code

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device

device(type='cuda')

## Initializing Model's Weights and Transforms

In [5]:
weights = models.ResNet18_Weights.DEFAULT
transforms = weights.transforms()

transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

## Loading the Dataset

In [6]:
train_ds = CIFAR10(root="/content/", download=True, transform=transforms, train=True)
test_ds = CIFAR10(root="/content/", download=True, transform=transforms, train=False)
classes_names = test_ds.classes

print(len(train_ds), len(test_ds))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 51413650.41it/s] 


Extracting /content/cifar-10-python.tar.gz to /content/
Files already downloaded and verified
50000 10000


## Creating the DeviceLoaders

In [7]:
class DeviceLoader:
    def __init__(self, dataset, device, batch_size=64, shuffle=True):
        self.loader = DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=shuffle,
            num_workers=cpu_count(),
            pin_memory=True
        )
        self.device = device
        self.batch_size = batch_size

    def __to_device(self, data):
        if isinstance(data, (list, tuple)):
            return [self.__to_device(x) for x in data]
        return data.to(self.device, non_blocking=True)

    def __len__(self):
        return len(self.loader)

    def __iter__(self):
        for batch in self.loader:
            yield self.__to_device(batch)

In [8]:
# Initializing the Device Loaders
BATCH_SIZE = 32

train_dl = DeviceLoader(train_ds, device, batch_size=BATCH_SIZE, shuffle=True)
test_dl = DeviceLoader(test_ds, device, batch_size=BATCH_SIZE, shuffle=False)

print(len(train_dl), len(test_dl))

1563 313


## Initializing the Model

In [9]:
resNet = models.resnet18(weights=weights).to(device)

summary(model=resNet,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 152MB/s]
  action_fn=lambda data: sys.getsizeof(data.storage()),
  return super().__sizeof__() + self.nbytes()


Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
ResNet (ResNet)                          [1, 3, 224, 224]     [1, 1000]            --                   True
├─Conv2d (conv1)                         [1, 3, 224, 224]     [1, 64, 112, 112]    9,408                True
├─BatchNorm2d (bn1)                      [1, 64, 112, 112]    [1, 64, 112, 112]    128                  True
├─ReLU (relu)                            [1, 64, 112, 112]    [1, 64, 112, 112]    --                   --
├─MaxPool2d (maxpool)                    [1, 64, 112, 112]    [1, 64, 56, 56]      --                   --
├─Sequential (layer1)                    [1, 64, 56, 56]      [1, 64, 56, 56]      --                   True
│    └─BasicBlock (0)                    [1, 64, 56, 56]      [1, 64, 56, 56]      --                   True
│    │    └─Conv2d (conv1)               [1, 64, 56, 56]      [1, 64, 56, 56]      36,864               True
│    │    └─BatchN

## Freezing Model's Parameters

In [10]:
for param in resNet.parameters():
    param.requires_grad = False

## Adjusting the Output Layer

In [11]:
resNet.fc = nn.Linear(resNet.fc.in_features, 10).to(device)

summary(model=resNet,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                  Input Shape          Output Shape         Param #              Trainable
ResNet (ResNet)                          [1, 3, 224, 224]     [1, 10]              --                   Partial
├─Conv2d (conv1)                         [1, 3, 224, 224]     [1, 64, 112, 112]    (9,408)              False
├─BatchNorm2d (bn1)                      [1, 64, 112, 112]    [1, 64, 112, 112]    (128)                False
├─ReLU (relu)                            [1, 64, 112, 112]    [1, 64, 112, 112]    --                   --
├─MaxPool2d (maxpool)                    [1, 64, 112, 112]    [1, 64, 56, 56]      --                   --
├─Sequential (layer1)                    [1, 64, 56, 56]      [1, 64, 56, 56]      --                   False
│    └─BasicBlock (0)                    [1, 64, 56, 56]      [1, 64, 56, 56]      --                   False
│    │    └─Conv2d (conv1)               [1, 64, 56, 56]      [1, 64, 56, 56]      (36,864)             False
│    │    

## Creating the Training and Evaluating Loops

In [12]:
def training_step(model, train_dl, loss_fn, eval_fn, opt):
    from tqdm import tqdm # For the progress bar
    

    # Setting batch size and model's device
    batch_size = train_dl.batch_size
    model_device = next(model.parameters()).device

    # Initialize training loss and accuracy
    train_loss, train_eval = 0, 0

    print("\tTraining Step: ", end="")

    model.train()
    for x_train, y_train in tqdm(train_dl):
        # Moving batches to device
        x_train, y_train = x_train.to(model_device, non_blocking=True), y_train.to(model_device, non_blocking=True)

        # Generating predictions
        model_logits = model(x_train)

        # Calculate loss
        loss = loss_fn(model_logits, y_train)
        train_loss += loss.item()
        train_eval += eval_fn(model_logits, y_train)

        # Updating Model's parameters
        opt.zero_grad()
        loss.backward()
        opt.step()

    train_loss /= len(train_dl)
    train_eval /= len(train_dl)

    return train_loss, train_eval

In [13]:
def validation_step(model, valid_dl, loss_fn, eval_fn):
    import torch
    from tqdm import tqdm

    # Setting batch size and model's device
    batch_size = valid_dl.batch_size
    model_device = next(model.parameters()).device

    # Initialize validation loss and accuracy
    valid_loss, valid_eval = 0, 0

    print("\tValidation Step: ", end="")

    model.eval()
    with torch.inference_mode():
        for x_valid, y_valid in tqdm(valid_dl):
            # Moving batches to model's device
            x_valid, y_valid = x_valid.to(model_device, non_blocking=True), y_valid.to(model_device, non_blocking=True)

            # Generate Predictions
            model_logits = model(x_valid)

            valid_loss += loss_fn(model_logits, y_valid).item()
            valid_eval += eval_fn(model_logits, y_valid)

        valid_loss /= len(valid_dl)
        valid_eval /= len(valid_dl)

        return valid_loss, valid_eval

In [14]:
def fit(model, epochs, train_dl, valid_dl, loss_fn, eval_fn, opt):
    from timeit import default_timer as timer
    import torch


    # Starting the `timer` and initialize the evaluating Lists
    start_time = timer()
    train_losses, train_evals = [], []
    valid_losses, valid_evals = [], []

    print("Starting Process...\n")
    
    for epoch in range(1, epochs + 1):
        print(f"-> Epoch: {epoch}/{epochs}")

        # Training and Evaluating the Model
        train_loss, train_eval = training_step(model, train_dl, loss_fn, eval_fn, opt)
        valid_loss, valid_eval = validation_step(model, valid_dl, loss_fn, eval_fn)

        print()
        print(
            f"   Train Loss: {train_loss:.4f} | "
            f"Train Accuracy: {train_eval:.2f}% | "
            f"Valid Loss: {valid_loss:.4f} | "
            f"Valid Accuracy (%): {valid_eval:.2f}%")
        print("-" * 99, end="\n\n")
        
        train_losses.append(train_loss)
        train_evals.append(train_eval)
        valid_losses.append(valid_loss)
        valid_evals.append(valid_eval)

    print("Process Completed Successfully...")

    return {"model_train_loss": train_losses,
        "model_train_eval": train_evals,
        "model_valid_loss": valid_losses,
        "model_valid_eval": valid_evals,
        "model_name": model.__class__.__name__,
        "model_loss_fn": loss_fn.__class__.__name__,
        "model_evaluating_m": eval_fn.__name__,
        "model_optimizer": opt.__class__.__name__,
        "model_device": next(model.parameters()).device.type,
        "model_epochs": epochs,
        "model_time": timer() - start_time}

## Creating Accuracy Metric

In [15]:
def accuracy_fn(model_logits, labels):
    preds = torch.softmax(model_logits, dim=1).argmax(dim=1)

    return (preds == labels).sum().item() / len(labels)

## Setting Loss Function and Optimizer

In [16]:
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(params=resNet.parameters(), lr=1e-3)

## Training the Model

In [18]:
res = fit(model=resNet,
          epochs=3,
          train_dl=train_dl,
          valid_dl=test_dl,
          loss_fn=loss_fn,
          eval_fn=accuracy_fn,
          opt=opt)

Starting Process...

-> Epoch: 1/3
	Training Step: 

100%|██████████| 1563/1563 [02:19<00:00, 11.20it/s]


	Validation Step: 

100%|██████████| 313/313 [00:26<00:00, 11.78it/s]



   Train Loss: 0.6779 | Train Accuracy: 0.76% | Valid Loss: 0.6545 | Valid Accuracy (%): 0.77%
---------------------------------------------------------------------------------------------------

-> Epoch: 2/3
	Training Step: 

100%|██████████| 1563/1563 [02:20<00:00, 11.13it/s]


	Validation Step: 

100%|██████████| 313/313 [00:27<00:00, 11.25it/s]



   Train Loss: 0.6617 | Train Accuracy: 0.77% | Valid Loss: 0.6476 | Valid Accuracy (%): 0.78%
---------------------------------------------------------------------------------------------------

-> Epoch: 3/3
	Training Step: 

100%|██████████| 1563/1563 [02:18<00:00, 11.29it/s]


	Validation Step: 

100%|██████████| 313/313 [00:28<00:00, 11.17it/s]


   Train Loss: 0.6686 | Train Accuracy: 0.77% | Valid Loss: 0.6290 | Valid Accuracy (%): 0.79%
---------------------------------------------------------------------------------------------------

Process Completed Successfully...





## Saving the Model

In [21]:
def save_model(model, saved_model_path: str, if_exists_stop=False):
    target_path = Path('/'.join(saved_model_path.split('/')[:-1]))
    model_name = saved_model_path.split('/')[-1]

    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "Wrong extension: Expecting `.pt` or `.pth`..."
    
    if not target_path.exists():
        target_path.mkdir(parents=True, exist_ok=True)

    if (target_path / model_name).is_file():
        print(f"[INFO] Model `{model_name}` already exists on `{target_path}`...")

        if if_exists_stop:
            return

        print(f"[INFO] Deleting `{target_path / model_name}`...")
        remove(target_path / model_name)

    print(f"[INFO] Saving Model `{model_name}` to `{target_path}`...")
    save(obj=model.state_dict(), f=target_path/model_name)

    print(f"[INFO] Model Successfully Saved to {target_path / model_name}")

In [22]:
save_model(resNet, "/content/models/resNet.pth")

[INFO] Saving Model `resNet.pth` to `/content/models`...
[INFO] Model Successfully Saved to /content/models/resNet.pth
