In [20]:
# !pip install thop
# !pip install torchsummary
# !pip install einops
# !pip install -q kaggle
# !pip install torch
# !pip install numpy
# !pip install opencv-python
# !pip install matplotlib
# !pip install natsort
# !pip install torchvision

In [21]:
import numpy as np
import os
import sys
import json
import cv2
import math
import time
from tqdm import tqdm
import matplotlib
import matplotlib.pyplot as plt
from collections import OrderedDict
from glob import glob
from natsort import natsorted

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.optim as optim
import torch.nn as nn
from torchvision import transforms
import torch.nn.functional as F

from thop import profile
from thop import clever_format
from torchsummary import summary

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

%matplotlib inline
matplotlib.rcParams['figure.facecolor'] = '#ffffff'


In [22]:
result = torch.cuda.get_device_name(torch.cuda.current_device()) if torch.cuda.is_available() else "cpu"
print("Current device: " + result)
torch.cuda.empty_cache()


Current device: NVIDIA RTX A4000


# Download Data

In [23]:
# !unzip DIV2K_train_HR.zip
# !unzip DIV2K_valid_HR.zip
# !unzip DIV2K_train_LR_bicubic_X3.zip
# !unzip DIV2K_valid_LR_bicubic_X3.zip
# !unzip DIV2k_train_LR_unknown_X3.zip
# !unzip DIV2k_valid_LR_unknown_X3.zip

# !unzip Set5.zip
# !rm ./adarlab-ai-training.zip

# Checking Download

In [24]:
!pwd

img_train_data_dir_1 = './DIV2K_train_LR_bicubic/X3'
img_train_data_dir_2 = './Flickr2K_LR_bicubic/train_X3'
train_images = natsorted(glob(os.path.join(img_train_data_dir_1, '*.png'))) + natsorted(glob(os.path.join(img_train_data_dir_2, '*.png')))
print(len(train_images))

img_valid_data_dir_1 = './DIV2K_valid_LR_bicubic/X3'
img_valid_data_dir_2 = './Flickr2K_LR_bicubic/val_X3'
valid_images = natsorted(glob(os.path.join(img_valid_data_dir_1, '*.png'))) + natsorted(glob(os.path.join(img_valid_data_dir_2, '*.png')))
print(len(valid_images))

ans_train_data_dir_1 = './DIV2K_train_HR'
ans_train_data_dir_2 = './Flickr2K_train_HR'
train_ans = natsorted(glob(os.path.join(ans_train_data_dir_1, '*.png'))) + natsorted(glob(os.path.join(ans_train_data_dir_2, '*.png')))

ans_valid_data_dir_1 = './DIV2K_valid_HR'
ans_valid_data_dir_2 = './Flickr2K_val_HR'
valid_ans = natsorted(glob(os.path.join(ans_valid_data_dir_1, '*.png'))) + natsorted(glob(os.path.join(ans_valid_data_dir_2, '*.png')))

/home/va8800/ken_ai/2024-ai-training-fianl-project
3300
250


# Dataloader

In [25]:
config = {
  "data_dir": '/home/va8800/ken_ai/2024-ai-training-fianl-project',
  "data_num": 900,
  "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
  # you can set your own training configurations
  "batch_size": 1,
  "learning_rate": 0.0005,
  "n_epochs": 50,
  "pic_num": 1,
}

In [26]:
transform = None
# transform = transforms.Compose([
#     transforms.RandomCrop(224),
# ])

In [27]:
class TrainImg(Dataset):
  def __init__(self, config, set_type="train", transform=None):
    self.device = config["device"]
    self.transform = transform
    ###########################your code here#############################
    # get the image path list -> self.image_names
    self.train_image_names = train_images
    self.train_ans_names = train_ans
    self.valid_image_names = valid_images
    self.valid_ans_names = valid_ans

    ########################################################################
    if set_type == "train":
        # n_start = 0
        # n_end = 4
        self.image_names = self.train_image_names
        self.ans_names = self.train_ans_names
  
    elif set_type == "val":
        # n_start = 4
        # n_end = config['data_num']
        self.image_names = self.valid_image_names
        self.ans_names = self.valid_ans_names

    # self.image_names = self.image_names[n_start:n_end]
    # self.ans_names = self.ans_names[n_start:n_end]
  
    ########################################################################

  def __len__(self):
    return len(self.image_names)

  def __getitem__(self, idx):
    ########################################################################
    # get the input image step by step
    # 1. read the image using cv2
    # 2. transpose the dimension from [h, w, 3] to [3, h, w]
    # 3. from numpy array to tensor
    # 4. normalize the value from [0, 255] to [0, 1]
    image_name = self.image_names[idx]
    image = cv2.imread(image_name, cv2.IMREAD_COLOR)
    image = np.transpose(image, (2, 0, 1))  # transpose the dimension from [h, w, 3] to [3, h, w]
    image = torch.from_numpy(image).float() # from numpy array to tensor
    # image = image / 255 # normalize the value from [0, 255] to [0, 1]
    
    image_pic_arr = []
    image_pic_arr.append(image)
    length = config["pic_num"] - 1
    if self.transform:
        for i in range(length):
            image_pic = self.transform((image))
            image_pic_arr.append(image_pic)
    else:
        for i in range(length):
            image_pic_arr.append(image)
    
    image = image / 255 # normalize the value from [0, 255] to [0, 1]
    
    for i in range(length + 1):
        image_pic_arr[i] = image_pic_arr[i] / 255
        
    ans_name = self.ans_names[idx]
    ans = cv2.imread(ans_name, cv2.IMREAD_COLOR)
    ans = np.transpose(ans, (2, 0, 1))  # transpose the dimension from [h, w, 3] to [3, h, w]
    ans = torch.from_numpy(ans).float() # from numpy array to tensor
    # image = image / 255 # normalize the value from [0, 255] to [0, 1]
    
    ans_pic_arr = []
    ans_pic_arr.append(ans)
    length = config["pic_num"] - 1
    if self.transform:
        for i in range(length):
            ans_pic = self.transform((ans))
            ans_pic_arr.append(ans_pic)
    else:
        for i in range(length):
            ans_pic_arr.append(ans)
    
    ans = ans / 255 # normalize the value from [0, 255] to [0, 1]
    
    for i in range(length + 1):
        ans_pic_arr[i] = ans_pic_arr[i] / 255
    
    return {
        'image_name': image_name,
        'image': image_pic_arr,
        'ans': ans_pic_arr,
    }

In [28]:
train_ds = TrainImg(config, set_type='train', transform=transform)
val_ds = TrainImg(config, set_type='val')

train_dl = DataLoader(train_ds, config["batch_size"], shuffle=True, drop_last=True, num_workers=1)
val_dl = DataLoader(val_ds, config["batch_size"], shuffle=True, drop_last=True, num_workers=1)

print("Total dataset length: ", config['data_num'])
print("Train dataset length: ", len(train_ds))
print("Validation dataset length: ", len(val_ds))

Total dataset length:  900
Train dataset length:  3300
Validation dataset length:  250


# Show images

In [29]:
def draw_img(image, vis=None, color_fixed=None, linewidth=1, img_order='rgb', draw_kp=True, kp_style=None):
  """ Inpaints a hand stick figure into a matplotlib figure.
  image:    original image input
  coords_hw:  predicted keypoint (non normalized) -> [0, 224)
  """
  if kp_style is None:
    # kp_style[0] for circle radius, kp_style[1] for circle point thickness
    kp_style = (1, 2)

  # if image have four dimension like [1. 224. 224. 3] then squeeze to [3. 224. 3]
  image = np.squeeze(image)

  if len(image.shape) == 2:
    image = np.expand_dims(image, 2)
  s = image.shape
  assert len(s) == 3, "This only works for single images."

  convert_to_uint8 = False

  if s[2] == 1:
    # grayscale case
    image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-4)
    image = np.tile(image, [1, 1, 3])
    pass

  elif s[2] == 3:
    # RGB case
    if image.dtype == np.uint8:
        convert_to_uint8 = True
        image = image.astype('float64') / 255.0
    elif image.dtype == np.float32:
        # convert to gray image
        image = np.mean(image, axis=2)
        image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-4)
        image = np.expand_dims(image, 2)
        image = np.tile(image, [1, 1, 3])
  else:
    assert 0, "Unknown image dimensions."

  colors = np.array(
    [[0.4, 0.4, 0.4],
    [0.4, 0.0, 0.0],
    [0.6, 0.0, 0.0],
    [0.8, 0.0, 0.0],
    [1.0, 0.0, 0.0],
    [0.4, 0.4, 0.0],
    [0.6, 0.6, 0.0],
    [0.8, 0.8, 0.0],
    [1.0, 1.0, 0.0],
    [0.0, 0.4, 0.2],
    [0.0, 0.6, 0.3],
    [0.0, 0.8, 0.4],
    [0.0, 1.0, 0.5],
    [0.0, 0.2, 0.4],
    [0.0, 0.3, 0.6],
    [0.0, 0.4, 0.8],
    [0.0, 0.5, 1.0],
    [0.4, 0.0, 0.4],
    [0.6, 0.0, 0.6],
    [0.7, 0.0, 0.8],
    [1.0, 0.0, 1.0]]
  )

  if img_order == 'rgb':
    # cv2 operation under BGR
    colors = colors[:, ::-1]

  color_map = {
    'k': np.array([0.0, 0.0, 0.0]),
    'w': np.array([1.0, 1.0, 1.0]),
    'b': np.array([0.0, 0.0, 1.0]),
    'g': np.array([0.0, 1.0, 0.0]),
    'r': np.array([1.0, 0.0, 0.0]),
    'm': np.array([1.0, 1.0, 0.0]),
    'c': np.array([0.0, 1.0, 1.0])
  }

  if convert_to_uint8:
    image = (image * 255).astype('uint8')

  return image

In [30]:
# batch_iter = iter(train_dl)
# batch = next(batch_iter)

# imgs = batch['image'] #return[img1, img2, img3]
# print(batch['image'])
# pic_num = 4
# fig, axes = plt.subplots(len(imgs), pic_num, figsize=(15, 10))
# for i in range(len(imgs)):
#     for j in range(pic_num):
#         # 將圖像和關鍵點轉換為 NumPy 格式
#         img_np = imgs[i][j].permute(1, 2, 0).numpy()

#         # 使用 draw_hand 函數繪製關鍵點
#         trainimg = draw_img
        
#         # 顯示結果
#         axes[i, j].imshow(trainimg)
#         # axes[i, j].axis('off')

# plt.show()


# Model

+ Model Specifications:
  + Input: **`[B, 3, 224, 224]`**
  + Output: **`[B, 21, 2]`** --> 21 for the num of the landmarks, 2 for the coordinates in (x, y) format
  + Layer: You can build up your own model architecture with no limitations.
  + Cost: The computational cost (FLOPs) may not over **`20 GFLOPs`**

In [31]:
from model import SuperResolution

Net = SuperResolution()
print(Net)

SuperResolution(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 27, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (PS): PixelShuffle(upscale_factor=3)
  (relu): ReLU(inplace=True)
)


# Testing Model Computational Cost

In [32]:
# # pseudo image
# image = torch.rand(1, 3, 224, 224).cuda()

# # define your model
model = Net.to(config["device"])

# out = model(image)

# # torchsummary report
# summary(model, input_size=(3, 224, 224))
# print(f'From input shape: {image.shape} to output shape: {out.shape}')

# # thop report
# macs, parm = profile(model, inputs=(image, ))
# print(f'FLOPS: {macs * 2 / 1e9} G, Params: {parm / 1e6} M.')
img = torch.randn(1, 3, 256, 256).to(config['device'])
macs, params = profile(model, inputs=(img, ), verbose=False)
flops = macs * 2 / 1e9  # G
params = params / 1e6   # M
print('============================')
print(f'FLOPs : { flops } G')
print(f'PARAMS : { params } M ')
print('============================')
if flops < 11.1:
    print('Your FLOPs is smaller than 11.1 G.')
    print('You will get this 15 points.')
    print('Congratulations !!!')
else:
    print('Your FLOPs is larger than 11.1 G.')
    print('You will not get this 10 points.')

FLOPs : 7.096762368 G
PARAMS : 0.054299 M 
Your FLOPs is smaller than 11.1 G.
You will get this 15 points.
Congratulations !!!


# Criterion

In [33]:
# class CMSELoss(nn.Module):
#   """
#   Coordinate MSE loss
#   input :
#     y_pred = b, 21, 2   (coordinate of 21 keypoints)
#     y_true = b, 21, 2   (keypoints, (y, x))
#   """
#   def __init__(self):
#     super().__init__()
#     self.loss = nn.MSELoss()

#   def forward(self, y_pred, y_true):
#     y_true = torch.flip(y_true, [2]) # flip (y, x) to (x, y)
#     return self.loss(y_pred, y_true)

In [34]:
# criterion = CMSELoss()

# Optimizer and Scheduler (optional)

In [35]:
optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
# optimizer = torch.optim.Adadelta(model.parameters(), lr=1)

In [36]:
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.002, steps_per_epoch=len(train_dl), epochs=config["n_epochs"])
# scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, steps_per_epoch=len(train_dl), epochs=config["n_epochs"], div_factor=2, final_div_factor=5, pct_start=0.09)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = config["pic_num"] * (len(train_dl.dataset) / config["batch_size"]) * config["n_epochs"], eta_min = 0.00005)
print((len(train_dl.dataset) / config["batch_size"]) * config["n_epochs"])

165000.0


# Training
  + Record the **`loss / epoch`** learning curve
  + If using learning rate scheduler, record the **`lr / epoch`** curve

In [37]:
# weight_path = '/home/va8800/ken_ai/model_weights.pth'
# weight_path = '/home/STuser19/MID/model_weights.pth'

# checkpoint = torch.load(weight_path)
# model.load_state_dict(checkpoint, strict=True)

In [None]:
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            # initialize tracker for minimum validation loss
# valid_loss_min = np.Inf # set initial "min" to infinity
valid_loss_min = np.inf # set initial "min" to infinity

# initialize history for recording what we want to know
history = []

for epoch in range(config["n_epochs"]):
    # monitor training loss, validation loss and learning rate
    train_loss = 0.0
    valid_loss = 0.0
    lrs    = []
    result = {'train_loss': [], 'val_loss': [], 'lrs': []}

    # prepare model for training
    model.train()

    #######################
    # train the model #
    #######################
    for batch in tqdm(train_dl):
        for i in range(len(batch['image'])):
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            # print(batch['image'][i].shape)
            output = model(batch['image'][i].to(config['device']))
            # print(output.shape)
            # print(batch['ans'][i].shape)
            if(output.shape != batch['ans'][i].shape):
                output = torch.nn.functional.interpolate(output, size=batch['ans'][i].shape[-2:], mode='bilinear', align_corners=False)
            # calculate the loss
            loss = ((output - batch['ans'][i].to(config['device'])) ** 2).mean()
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            scheduler.step()

            # record learning rate
            lrs.append(optimizer.param_groups[0]['lr'])

            # update running training loss
            train_loss += loss.item()*batch['image'][i].size(0)

    ######################
    # validate the model #
    ######################
    model.eval()
    for batch in val_dl:
        # compute predicted outputs by passing inputs to the model
        output = model(batch['image'][0].to(config['device']))
        if(output.shape != batch['ans'][i].shape):
                output = torch.nn.functional.interpolate(output, size=batch['ans'][i].shape[-2:], mode='bilinear', align_corners=False)
        # calculate the loss
        # loss = criterion(output, batch[''][0].to(config['device']))
        loss = ((output - batch['ans'][i].to(config['device'])) ** 2).mean()

        # update running validation loss
        valid_loss += loss.item()*batch['image'][0].size(0)

    # print training/validation statistics
    # calculate average loss over an epoch
    train_loss = train_loss/(len(train_dl.dataset)*config["pic_num"])
    result['train_loss'] = train_loss
    valid_loss = valid_loss/len(val_dl.dataset)
    result['val_loss'] = valid_loss
    leaning_rate = lrs
    result['lrs'] = leaning_rate
    history.append(result)

    print('Epoch {:2d}: Learning Rate: {:.6f} Training Loss: {:.6f} Validation Loss:{:.6f}'.format(
        epoch+1,
        leaning_rate[-1],
        train_loss,
        valid_loss
        ))

    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print("Validation loss decreased({:.6f}-->{:.6f}). Saving model ..".format(
        valid_loss_min,
        valid_loss
        ))
        torch.save(model.state_dict(),"model.pt")
        valid_loss_min = valid_loss

100%|██████████████████████████████████████████████| 3300/3300 [09:17<00:00,  5.92it/s]


Epoch  1: Learning Rate: 0.000500 Training Loss: 0.003625 Validation Loss:0.002415
Validation loss decreased(inf-->0.002415). Saving model ..


100%|██████████████████████████████████████████████| 3300/3300 [08:49<00:00,  6.23it/s]


Epoch  2: Learning Rate: 0.000498 Training Loss: 0.002322 Validation Loss:0.002524


100%|██████████████████████████████████████████████| 3300/3300 [08:58<00:00,  6.13it/s]


Epoch  3: Learning Rate: 0.000496 Training Loss: 0.002223 Validation Loss:0.002263
Validation loss decreased(0.002415-->0.002263). Saving model ..


100%|██████████████████████████████████████████████| 3300/3300 [09:02<00:00,  6.08it/s]


Epoch  4: Learning Rate: 0.000493 Training Loss: 0.002118 Validation Loss:0.002225
Validation loss decreased(0.002263-->0.002225). Saving model ..


100%|██████████████████████████████████████████████| 3300/3300 [09:01<00:00,  6.10it/s]


Epoch  5: Learning Rate: 0.000489 Training Loss: 0.002058 Validation Loss:0.002187
Validation loss decreased(0.002225-->0.002187). Saving model ..


100%|██████████████████████████████████████████████| 3300/3300 [09:46<00:00,  5.62it/s]


Epoch  6: Learning Rate: 0.000484 Training Loss: 0.002029 Validation Loss:0.002113
Validation loss decreased(0.002187-->0.002113). Saving model ..


100%|██████████████████████████████████████████████| 3300/3300 [08:48<00:00,  6.25it/s]


Epoch  7: Learning Rate: 0.000479 Training Loss: 0.001998 Validation Loss:0.002113


100%|██████████████████████████████████████████████| 3300/3300 [09:13<00:00,  5.97it/s]


Epoch  8: Learning Rate: 0.000472 Training Loss: 0.001994 Validation Loss:0.002542


100%|██████████████████████████████████████████████| 3300/3300 [09:44<00:00,  5.64it/s]


Epoch  9: Learning Rate: 0.000465 Training Loss: 0.001965 Validation Loss:0.002080
Validation loss decreased(0.002113-->0.002080). Saving model ..


100%|██████████████████████████████████████████████| 3300/3300 [09:48<00:00,  5.60it/s]


Epoch 10: Learning Rate: 0.000457 Training Loss: 0.001961 Validation Loss:0.002083


100%|██████████████████████████████████████████████| 3300/3300 [09:51<00:00,  5.58it/s]


Epoch 11: Learning Rate: 0.000448 Training Loss: 0.001937 Validation Loss:0.002076
Validation loss decreased(0.002080-->0.002076). Saving model ..


 41%|███████████████████                           | 1367/3300 [04:09<06:15,  5.15it/s]

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

torch.save(model.state_dict(), '/home/va8800/ken_ai/2024-ai-training-fianl-project/model.pth')
# torch.save(model.state_dict(), '/home/STuser19/MID/v3/model.pth')
print("Save model weight successfully.")

In [None]:
def plot_losses(history):
  train_losses = [x.get('train_loss') for x in history]
  val_losses = [x['val_loss'] for x in history]
  plt.plot(train_losses, '-bx')
  plt.plot(val_losses, '-rx')
  plt.xlabel('epoch')
  plt.ylabel('loss')
  plt.legend(['Training', 'Validation'])
  plt.title('Loss vs. No. of epochs');

plot_losses(history)

In [None]:
def plot_lrs(history):
  lrs = np.concatenate([x.get('lrs', []) for x in history])
  plt.plot(lrs)
  plt.xlabel('Batch no.')
  plt.ylabel('Learning rate')
  plt.title('Learning Rate vs. Batch no.');

plot_lrs(history)

# Load your model

In [None]:
weight_path = '/home/va8800/ken_ai/2024-ai-training-fianl-project/model.pth'
# weight_path = '/home/STuser19/MID/v3/model_weights.pth'

checkpoint = torch.load(weight_path)
model.eval()
model.load_state_dict(checkpoint, strict=True)

In [None]:
print(model)