<a href="https://colab.research.google.com/github/R12942159/DeepLearning/blob/main/DLCV_hw1_pretrain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch


# Get cuda from GPU device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using: {device}")

Using: cuda


In [3]:
# search file paths
from glob import glob


img_paths_train = glob('/content/drive/MyDrive/NTU_DLCV/p1_data/train_50/*.png') # *: all
img_paths_val = glob('/content/drive/MyDrive/NTU_DLCV/p1_data/val_50/*.png')
# img_paths_train = sorted(glob('/content/drive/MyDrive/NTU_DLCV/p1_data/train_50/*.png')) # *: all

In [4]:
# number of images
len(img_paths_train), len(img_paths_val)

(22500, 2500)

In [5]:
import torch
from PIL import Image
import numpy as np


class PretrainDataset(torch.utils.data.Dataset):
    def __init__(self, img_paths, img_size, transform):
        self.img_paths = img_paths
        self.img_size = img_size
        self.transform = transform

    def __len__(self):
        """number of samples"""
        return len(self.img_paths)

    def __getitem__(self, idx):
        """read 1 sample"""
        # Read img
        path = self.img_paths[idx] # get img path
        img = Image.open(path).convert('RGB') # read img
        # img_pil = Image.fromarray(img)
        # img = img_pil.resize((IMG_SIZE, IMG_SIZE), Image.LANCZOS)
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert to RGB

        # transform img
        img = self.transform(img)

        # Read class index
        cls_idx = int((path.split('/')[-1]).split('_')[0])
        cls_idx = torch.tensor(cls_idx, dtype=torch.int64)

        return img, cls_idx

In [6]:
from torchvision.models import resnet50, ResNet50_Weights


# Preprocess Transform
transform =  ResNet50_Weights.IMAGENET1K_V2.transforms()

# Build Dataset
IMG_SIZE = 32 # ResNet-50 typically takes input images of size 224x224 pixels.
train_ds = PretrainDataset(img_paths_train, IMG_SIZE, transform)
val_ds = PretrainDataset(img_paths_val, IMG_SIZE, transform)

In [7]:
transform

ImageClassification(
    crop_size=[224]
    resize_size=[232]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [8]:
# Build DataLoaders
BATCH_SIZE = 150
train_loader = torch.utils.data.DataLoader(train_ds, BATCH_SIZE, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_ds, BATCH_SIZE)

#### Build Model (https://pytorch.org/vision/stable/models.html#classification)

In [9]:
from torchvision import models

model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [10]:
from torch import nn

# replace classifier
num_features = model.fc.in_features # len of feature vectors

# # Freeze model
# for param in model.parameters():
#     param.requires_grad = False

# Replace classifier
model.fc = nn.Linear(num_features, 50)
print(model.fc)

Linear(in_features=2048, out_features=50, bias=True)


In [11]:
from torchsummary import summary


resize_size = transform.resize_size[0]
summary(model.to(device), (3, resize_size, resize_size))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 116, 116]           9,408
       BatchNorm2d-2         [-1, 64, 116, 116]             128
              ReLU-3         [-1, 64, 116, 116]               0
         MaxPool2d-4           [-1, 64, 58, 58]               0
            Conv2d-5           [-1, 64, 58, 58]           4,096
       BatchNorm2d-6           [-1, 64, 58, 58]             128
              ReLU-7           [-1, 64, 58, 58]               0
            Conv2d-8           [-1, 64, 58, 58]          36,864
       BatchNorm2d-9           [-1, 64, 58, 58]             128
             ReLU-10           [-1, 64, 58, 58]               0
           Conv2d-11          [-1, 256, 58, 58]          16,384
      BatchNorm2d-12          [-1, 256, 58, 58]             512
           Conv2d-13          [-1, 256, 58, 58]          16,384
      BatchNorm2d-14          [-1, 256,

#### Train Model

In [12]:
from tqdm.auto import tqdm # (optional) progress bar


def training(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.train() # to training mode. (The Dropout layer has a different behavior between the training mode and the evaluation mode.)
    epoch_loss, epoch_correct = 0, 0
    for batch_i, (x, y) in enumerate(tqdm(dataloader, leave=False)):
        x, y = x.to(device), y.to(device) # move data to GPU

        # zero the parameter gradients
        optimizer.zero_grad()

        # Compute prediction loss
        pred = model(x)
        loss = loss_fn(pred, y)

        # Optimization by gradients
        loss.backward() # backpropagation to compute gradients
        optimizer.step() # update model params

        # write to logs
        epoch_loss += loss.item() # tensor -> python value
        # (N, Class)
        epoch_correct += (pred.argmax(dim=1) == y).sum().item()

    # return avg loss of epoch, acc of epoch
    return epoch_loss/num_batches, epoch_correct/size


def testing(dataloader, model, loss_fn):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.eval() # model to test mode.
    epoch_loss, epoch_correct = 0, 0

    # No gradient for test data
    with torch.no_grad():
        for batch_i, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)

            # Compute prediction loss
            pred = model(x)
            loss = loss_fn(pred, y)

            # write to logs
            epoch_loss += loss.item()
            epoch_correct += (pred.argmax(1) == y).sum().item()

    return epoch_loss/num_batches, epoch_correct/size

In [19]:
import matplotlib.pyplot as plt


# Early Stopping
patience = 5
counter = 0
best_loss = np.inf

def train(pretrained, freeze=False):
    print(f"pretrained weights: {pretrained}, freeze: {freeze}")
    # Model
    if pretrained:
        model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
    else:
        model = models.resnet50(weights=None)

    if freeze:
        # Freeze model
        for param in model.parameters():
            param.requires_grad = False

    # Replace classifier
    num_features = model.fc.in_features # len of feature vectors
    model.fc = nn.Linear(num_features, 50)

    model = model.to(device)

    loss_fn = nn.CrossEntropyLoss()
    # lower learning rate for finetuning
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=1e-4)
    # schedule = ReduceLROnplateau(optimizer, 'min')

    EPOCHS = 50
    logs = {
        'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []
    }
    for epoch in tqdm(range(EPOCHS)):
        train_loss, train_acc = training(train_loader, model, loss_fn, optimizer)
        val_loss, val_acc = testing(val_loader, model, loss_fn)

        # # Note that step should be called after test_epoch()
        # schedule.step(val_loss)

        print(f'EPOCH: {epoch:04d} \
        train_loss: {train_loss:.4f}, train_acc: {train_acc:.3f} \
        val_loss: {val_loss:.4f}, val_acc: {val_acc:.3f} ')

        logs['train_loss'].append(train_loss)
        logs['train_acc'].append(train_acc)
        logs['val_loss'].append(val_loss)
        logs['val_acc'].append(val_acc)

        # Save model
        torch.save(model.state_dict(), "last.pth")

        # chcek improvement
        if val_loss < best_loss:
            counter = 0
            best_loss = val_loss
            torch.save(model.state_dict(), "best.pth")
        else:
            counter += 1
        if counter >= patience:
            print("Earlystop!")
            break


    # plot result
    plt.figure(figsize=(6, 3))
    plt.subplot(1, 2, 1)
    plt.title('Loss')
    plt.plot(logs['train_loss'])
    plt.plot(logs['val_loss'])
    plt.subplot(1, 2, 2)
    plt.title('Acc.')
    plt.plot(logs['train_acc'])
    plt.plot(logs['val_acc'])
    plt.show()

In [18]:
train(pretrained=False)
# transfer learning
train(pretrained=True)
train(pretrained=True, freeze=True)

pretrained weights: False, freeze: False


  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

EPOCH: 0000         train_loss: 3.4337, train_acc: 0.121         val_loss: 3.1759, val_acc: 0.166 


UnboundLocalError: ignored