<a href="https://colab.research.google.com/github/105062262/training-course_hw1/blob/master/training_course_hw1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Environment Setup**

In [0]:
!pip3 install torch torchvision tqdm matplotlib scikit-image

## **Download**

In [0]:
!rm -rf ./ccpd5000/ && rm ccpd5000.tar.gz
!wget https://github.com/amoshyc/cvlab-2019w-project/releases/download/v0.1/ccpd5000.tar.gz
!tar zxvf ccpd5000.tar.gz
!ls ccpd5000/**/*.jpg | wc -l # expected 6000 (5000 train/valid + 1000 test)

In [0]:
from pathlib import Path

img_dir = Path('./ccpd5000/train/')
img_paths = img_dir.glob('*.jpg')
img_paths = sorted(list(img_paths))

print(len(img_paths))

name = img_paths[0].name
print(name)

token = name.split('-')[3]
print(token)

token = token.replace('&', '_')
print(token)

values = token.split('_')
print(values)

values = [float(val) for val in values]
print(values) 

4000
00307112068966-90_87-326&463_432&500-422&494_328&494_327&467_421&467-0_0_17_6_33_24_33-120-9.jpg
422&494_328&494_327&467_421&467
422_494_328_494_327_467_421_467
['422', '494', '328', '494', '327', '467', '421', '467']
[422.0, 494.0, 328.0, 494.0, 327.0, 467.0, 421.0, 467.0]


## **Util**

In [0]:
import warnings

import torch
import numpy as np
from PIL import Image, ImageDraw
from skimage import util
from skimage.transform import ProjectiveTransform, warp

def draw_kpts(img, kpts, c='red', r=2.0):
    '''Draw keypoints on image.
    Args:
        img: (PIL.Image) will be modified
        kpts: (FloatTensor) keypoints in xy format, sized [8,]
        c: (PIL.Color) color of keypoints, default to 'red'
        r: (float) radius of keypoints, default to 2.0
    Return:
        img: (PIL.Image) modified image
    '''
    draw = ImageDraw.Draw(img)
    kpts = kpts.view(4, 2)
    kpts = kpts * torch.FloatTensor(img.size)
    kpts = kpts.numpy().tolist()
    for (x, y) in kpts:
        draw.ellipse([x - r, y - r, x + r, y + r], fill=c)
    return img


def draw_plate(img, kpts):
    '''Perspective tranform and draw the plate indicated by kpts to a 96x30 rectangle.
    Args:
        img: (PIL.Image) will be modified
        kpts: (FloatTensor) keypoints in xy format, sized [8,]
    Return:
        img: (PIL.Image) modified image
    Reference: http://scikit-image.org/docs/dev/auto_examples/xx_applications/plot_geometric.html
    '''
    src = np.float32([[96, 30], [0, 30], [0, 0], [96, 0]])
    dst = kpts.view(4, 2).numpy()
    dst = dst * np.float32(img.size)

    transform = ProjectiveTransform()
    transform.estimate(src, dst)
    with warnings.catch_warnings(): # surpress skimage warning
        warnings.simplefilter("ignore")
        warped = warp(np.array(img), transform, output_shape=(30, 96))
        warped = util.img_as_ubyte(warped)
    plate = Image.fromarray(warped)
    img.paste(plate)
    return img

## **Data**

In [0]:
!ls

ccpd5000  ccpd5000.tar.gz  sample_data


In [0]:
from PIL import Image
from tqdm import tqdm
from pathlib import Path

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import functional as tf

class CCPD5000:
  def __init__(self, img_dir):
    self.img_dir = Path(img_dir)
    self.img_paths = self.img_dir.glob('*.jpg')
    self.img_paths = sorted(list(self.img_paths))
    
  def __len__(self):
    return len(self.img_paths)
  
  def __getitem__(self, idx):
    img_path = self.img_paths[idx]
    
    # load image
    img = Image.open(img_path)
    W, H = img.size
    img = img.convert('RGB')
    img = img.resize((192, 320))
    img = tf.to_tensor(img)
    
    # parse annotation
    name = img_path.name
    token = name.split('-')[3]
    token = token.replace('&', '_')
    kpt = [float(val) for val in token.split('_')]
    kpt = torch.tensor(kpt) # [8,]
    kpt = kpt.view(4, 2) # [4, 2]
    kpt = kpt / torch.FloatTensor([W, H])
    kpt = kpt.view(-1) # [8,]
    
    return img, kpt
  

train_set = CCPD5000('./ccpd5000/train')
print(len(train_set))

img, kpt = train_set[-1]
print(img.size())
print(kpt.size())

4000
torch.Size([3, 320, 192])
torch.Size([8])


In [0]:
import torch
from torch import nn
from torch.nn import functional as F


class ConvBlock(nn.Module):
    def __init__(self, cin, cout):
        super().__init__()
        self.conv1 = nn.Conv2d(cin, cout, (3, 3), padding=1)
        self.conv2 = nn.Conv2d(cout, cout, (3, 3), padding=1)
        self.bn1 = nn.BatchNorm2d(cout)
        self.bn2 = nn.BatchNorm2d(cout)
        self.act1 = nn.ReLU() #self.act1 = nn.LeakyReLU()
        self.act2 = nn.ReLU() #self.act2 = nn.LeakyReLU()
    
    def forward(self, x):
        x = self.act1(self.bn1(self.conv1(x)))
        x = self.act2(self.bn2(self.conv2(x)))
        return x

class CCPDRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            ConvBlock(3, 32),
            nn.MaxPool2d((8, 8)),
            ConvBlock(32, 32),
            nn.MaxPool2d((4, 4)),
            ConvBlock(32, 64),
            nn.MaxPool2d((2, 2)),
            ConvBlock(64, 64),
            nn.MaxPool2d((2, 2)),
        )
        self.regressor = nn.Sequential(
            nn.Linear(128, 32),
            nn.LeakyReLU(),
            nn.Linear(32, 8),
            nn.Sigmoid(),
        )

    def forward(self, x):
        N = x.size(0)
        x = self.features(x)
        x = x.view(N, -1) # i.e. Flatten
        x = self.regressor(x)
        return x
      
# Check
device = 'cuda'
model = CCPDRegressor().to(device)
img_b = torch.rand(16, 3, 192, 320).to(device)
out_b = model(img_b)
print(out_b.size()) # expected [16, 8]

torch.Size([16, 8])


## **Train**

In [0]:
!nvidia-smi

Thu Feb 21 11:13:07 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.79       Driver Version: 410.79       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    70W / 149W |   1102MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [0]:
#!rm -rf ./log/'2019.02.21-10:27:57'

In [0]:
import json
import random
import numpy as np
from tqdm import tqdm
from pathlib import Path
from datetime import datetime

import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn')

import torch
from torch import nn
from torch.nn import functional as F
from torchvision.utils import save_image
from torch.utils.data import Subset, ConcatDataset, DataLoader
from torchvision.transforms import functional as tf

# For reproducibility
# Set before loading model and dataset
seed = 999
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True

train_set = CCPD5000('./ccpd5000/train/')
valid_set = CCPD5000('./ccpd5000/valid/')
visul_set = ConcatDataset([
    Subset(train_set, random.sample(range(len(train_set)), 32)),
    Subset(valid_set, random.sample(range(len(valid_set)), 32)),
])
train_loader = DataLoader(train_set, 32, shuffle=True, num_workers=3)
valid_loader = DataLoader(valid_set, 32, shuffle=False, num_workers=1)
visul_loader = DataLoader(visul_set, 32, shuffle=False, num_workers=1)


learning_rate = 8e-4 #change lr
device = 'cuda'
model = CCPDRegressor().to(device)
criterion = nn.L1Loss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

log_dir = Path('./log/') / f'{datetime.now():%Y.%m.%d-%H:%M:%S}'
log_dir.mkdir(parents=True)
print(log_dir)
history = {
    'train_mae': [],
    'valid_mae': [],
    'train_mse': [],
    'valid_mse': [],
}


def train(pbar):
    model.train()
    mae_steps = []
    mse_steps = []

    for img_b, kpt_b in iter(train_loader):
        img_b = img_b.to(device)
        kpt_b = kpt_b.to(device)

        optimizer.zero_grad()
        pred_b = model(img_b)
        loss = criterion(pred_b, kpt_b)
        loss.backward()
        optimizer.step()

        mae = loss.detach().item()
        mse = F.mse_loss(pred_b.detach(), kpt_b.detach()).item()
        mae_steps.append(mae)
        mse_steps.append(mse)

        pbar.set_postfix(mae=mae, mse=mse)
        pbar.update(img_b.size(0))

    avg_mae = sum(mae_steps) / len(mae_steps)
    avg_mse = sum(mse_steps) / len(mse_steps)
    pbar.set_postfix(avg_mae=f'{avg_mae:.5f}', avg_mse=f'{avg_mse:.5f}')
    history['train_mae'].append(avg_mae)
    history['train_mse'].append(avg_mse)


def valid(pbar):
    model.eval()
    mae_steps = []
    mse_steps = []

    for img_b, kpt_b in iter(valid_loader):
        img_b = img_b.to(device)
        kpt_b = kpt_b.to(device)
        pred_b = model(img_b)
        loss = criterion(pred_b, kpt_b)
        mae = loss.detach().item()

        mse = F.mse_loss(pred_b.detach(), kpt_b.detach()).item()
        mae_steps.append(mae)
        mse_steps.append(mse)

        pbar.set_postfix(mae=mae, mse=mse)
        pbar.update(img_b.size(0))

    avg_mae = sum(mae_steps) / len(mae_steps)
    avg_mse = sum(mse_steps) / len(mse_steps)
    pbar.set_postfix(avg_mae=f'{avg_mae:.5f}', avg_mse=f'{avg_mse:.5f}')
    history['valid_mae'].append(avg_mae)
    history['valid_mse'].append(avg_mse)


def visul(pbar, epoch):
    model.eval()
    epoch_dir = log_dir / f'{epoch:03d}'
    epoch_dir.mkdir()
    for img_b, kpt_b in iter(visul_loader):
        pred_b = model(img_b.to(device)).cpu()
        for img, pred_kpt, true_kpt in zip(img_b, pred_b, kpt_b):
            img = tf.to_pil_image(img)
            vis = draw_plate(img, pred_kpt)
            vis = draw_kpts(vis, true_kpt, c='orange')
            vis = draw_kpts(vis, pred_kpt, c='red')
            vis.save(epoch_dir / f'{pbar.n:03d}.jpg')
            pbar.update()


def log(epoch):
    with (log_dir / 'metrics.json').open('w') as f:
        json.dump(history, f)

    fig, ax = plt.subplots(2, 1, figsize=(6, 6), dpi=100)
    ax[0].set_title('MAE')
    ax[0].plot(range(epoch + 1), history['train_mae'], label='Train')
    ax[0].plot(range(epoch + 1), history['valid_mae'], label='Valid')
    ax[0].legend()
    ax[1].set_title('MSE')
    ax[1].plot(range(epoch + 1), history['train_mse'], label='Train')
    ax[1].plot(range(epoch + 1), history['valid_mse'], label='Valid')
    ax[1].legend()
    fig.savefig(str(log_dir / 'metrics.jpg'))
    plt.close()
    
    #save
    if torch.tensor(history['valid_mse']).argmin() == epoch:
      torch.save(model.state_dict(), str(log_dir/'model.pth'))

    
#update lr
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    
curr_lr = learning_rate
for epoch in range(20): #10->20
    print('Epoch', epoch, flush=True)
    with tqdm(total=len(train_set), desc='  Train') as pbar:
        train(pbar)

    with torch.no_grad():
        with tqdm(total=len(valid_set), desc='  Valid') as pbar:
            valid(pbar)
        with tqdm(total=len(visul_set), desc='  Visul') as pbar:
            visul(pbar, epoch)
        log(epoch)
    
    #lr decay
    if (epoch+1) % 7 == 0:
        curr_lr *= 0.5
        update_lr(optimizer, curr_lr)

log/2019.02.21-11:13:10
Epoch 0


  Train: 100%|██████████| 4000/4000 [01:48<00:00, 46.70it/s, avg_mae=0.03519, avg_mse=0.00317]
  Valid: 100%|██████████| 1000/1000 [00:35<00:00, 30.70it/s, avg_mae=0.01969, avg_mse=0.00081]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.05it/s]


Epoch 1


  Train: 100%|██████████| 4000/4000 [01:49<00:00, 49.86it/s, avg_mae=0.01647, avg_mse=0.00055]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 32.17it/s, avg_mae=0.01398, avg_mse=0.00043]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.14it/s]


Epoch 2


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 46.94it/s, avg_mae=0.01281, avg_mse=0.00033]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.95it/s, avg_mae=0.01181, avg_mse=0.00031]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 18.61it/s]


Epoch 3


  Train: 100%|██████████| 4000/4000 [01:51<00:00, 44.94it/s, avg_mae=0.01170, avg_mse=0.00027]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 30.98it/s, avg_mae=0.01355, avg_mse=0.00037]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 18.95it/s]


Epoch 4


  Train: 100%|██████████| 4000/4000 [01:51<00:00, 43.30it/s, avg_mae=0.01084, avg_mse=0.00023]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.52it/s, avg_mae=0.01090, avg_mse=0.00023]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.30it/s]


Epoch 5


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 49.12it/s, avg_mae=0.00993, avg_mse=0.00019]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 30.88it/s, avg_mae=0.01067, avg_mse=0.00022]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 21.91it/s]


Epoch 6


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 49.81it/s, avg_mae=0.00959, avg_mse=0.00018]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.44it/s, avg_mae=0.00989, avg_mse=0.00020]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.29it/s]


Epoch 7


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 51.69it/s, avg_mae=0.00796, avg_mse=0.00012]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.88it/s, avg_mae=0.00889, avg_mse=0.00016]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.21it/s]


Epoch 8


  Train: 100%|██████████| 4000/4000 [01:51<00:00, 44.33it/s, avg_mae=0.00777, avg_mse=0.00012]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 30.67it/s, avg_mae=0.00912, avg_mse=0.00016]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 19.68it/s]


Epoch 9


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 52.61it/s, avg_mae=0.00739, avg_mse=0.00011]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.19it/s, avg_mae=0.00830, avg_mse=0.00014]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.65it/s]


Epoch 10


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 53.16it/s, avg_mae=0.00734, avg_mse=0.00010]
  Valid: 100%|██████████| 1000/1000 [00:35<00:00, 29.05it/s, avg_mae=0.00816, avg_mse=0.00014]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 19.14it/s]


Epoch 11


  Train: 100%|██████████| 4000/4000 [01:51<00:00, 49.23it/s, avg_mae=0.00695, avg_mse=0.00010]
  Valid: 100%|██████████| 1000/1000 [00:35<00:00, 29.71it/s, avg_mae=0.00821, avg_mse=0.00014]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 19.31it/s]


Epoch 12


  Train: 100%|██████████| 4000/4000 [01:50<00:00, 44.52it/s, avg_mae=0.00685, avg_mse=0.00009]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.95it/s, avg_mae=0.00833, avg_mse=0.00015]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 20.18it/s]


Epoch 13


  Train: 100%|██████████| 4000/4000 [01:51<00:00, 50.42it/s, avg_mae=0.00697, avg_mse=0.00009]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.06it/s, avg_mae=0.00808, avg_mse=0.00014]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 19.71it/s]


Epoch 14


  Train: 100%|██████████| 4000/4000 [01:51<00:00, 47.31it/s, avg_mae=0.00626, avg_mse=0.00008]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 31.47it/s, avg_mae=0.00784, avg_mse=0.00013]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 17.86it/s]


Epoch 15


  Train: 100%|██████████| 4000/4000 [01:52<00:00, 46.00it/s, avg_mae=0.00596, avg_mse=0.00007]
  Valid: 100%|██████████| 1000/1000 [00:34<00:00, 30.13it/s, avg_mae=0.00796, avg_mse=0.00013]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 17.05it/s]


Epoch 16


  Train: 100%|██████████| 4000/4000 [01:45<00:00, 54.73it/s, avg_mae=0.00589, avg_mse=0.00007]
  Valid: 100%|██████████| 1000/1000 [00:31<00:00, 34.35it/s, avg_mae=0.00786, avg_mse=0.00013]
  Visul: 100%|██████████| 64/64 [00:02<00:00, 17.68it/s]


Epoch 17


  Train: 100%|██████████| 4000/4000 [01:44<00:00, 45.32it/s, avg_mae=0.00580, avg_mse=0.00007]
  Valid: 100%|██████████| 1000/1000 [00:33<00:00, 33.35it/s, avg_mae=0.00819, avg_mse=0.00014]
  Visul: 100%|██████████| 64/64 [00:03<00:00, 21.26it/s]


Epoch 18


  Train: 100%|██████████| 4000/4000 [01:41<00:00, 51.24it/s, avg_mae=0.00583, avg_mse=0.00007]
  Valid: 100%|██████████| 1000/1000 [00:31<00:00, 34.62it/s, avg_mae=0.00818, avg_mse=0.00013]
  Visul: 100%|██████████| 64/64 [00:02<00:00, 21.95it/s]


Epoch 19


  Train: 100%|██████████| 4000/4000 [01:40<00:00, 61.64it/s, avg_mae=0.00568, avg_mse=0.00006]
  Valid: 100%|██████████| 1000/1000 [00:30<00:00, 35.17it/s, avg_mae=0.00800, avg_mse=0.00013]
  Visul: 100%|██████████| 64/64 [00:02<00:00, 22.61it/s]


In [0]:
display.Image(str(log_dir / 'metrics.jpg'))

In [0]:
display.Image(str(log_dir / '009' / '000.jpg')) # 0 ~ 31 are training images' visualization

In [0]:
display.Image(str(log_dir / '009' / '032.jpg')) # 32 ~ 63 are validation images' visualization

## **Test**

In [0]:
#!rm -rf ./log/result/'2019.02.21-09:17:19'
#!rm -rf ./log/'2019.02.21-08:10:12'

In [0]:
from PIL import Image
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from PIL import Image
import pandas as pd

import torch
from torch.utils.data import DataLoader
from torchvision.transforms import functional as tf

class CCPD5000Test:
  def __init__(self, img_dir):
    self.img_dir = Path(img_dir)
    self.img_paths = self.img_dir.glob('*.jpg')
    self.img_paths = sorted(list(self.img_paths))
    
  def __len__(self):
    return len(self.img_paths)
  
  def __getitem__(self, idx):
    img_path = self.img_paths[idx]
    
    # load image
    img = Image.open(img_path)
    img = img.convert('RGB')
    img = img.resize((192, 320))
    img = tf.to_tensor(img)
    
    return img

test_set = CCPD5000Test('./ccpd5000/test/')
test_loader = DataLoader(test_set, 32, shuffle=False, num_workers=3)

device = 'cuda'
model = CCPDRegressor().to(device)
model.load_state_dict(torch.load('log/2019.02.21-11:13:10/model.pth'))    #resume

log_dir = Path('./log/') / 'result' / f'{datetime.now():%Y.%m.%d-%H:%M:%S}'
log_dir.mkdir(parents=True)
print(log_dir)

def test(pbar):
    model.eval()
    corner = []
    for img_b in iter(test_loader):
        kpt_b = model(img_b.to(device)).cpu()
        for img, kpt in zip(img_b, kpt_b):
            img = tf.to_pil_image(img)
            vis = draw_plate(img, kpt)
            vis = draw_kpts(vis, kpt, c='red')
            vis.save(log_dir / f'{pbar.n:03d}_test.jpg')
            corner.append([f'{pbar.n:03d}.jpg', *kpt.numpy().tolist()])
            pbar.update()
    return pd.DataFrame(corner)


with torch.no_grad():
    with tqdm(total=len(test_set)) as pbar:
        df_pred = test(pbar)
    df_pred.columns = ['name', 'BR_x', 'BR_y', 'BL_x', 'BL_y', 'TL_x', 'TL_y', 'TR_x', 'TR_y']
    df_pred.to_csv(log_dir / 'test_pred_Cheong.csv', float_format='%.5f', index=False)

  0%|          | 0/1000 [00:00<?, ?it/s]

log/result/2019.02.21-12:04:48


100%|██████████| 1000/1000 [00:40<00:00, 24.59it/s]
