<a href="https://colab.research.google.com/github/Denev6/CapStone/blob/main/tdcn_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TDCN 이미지 예측 모델 구현

수정 
- optimizer zero_grad 위치 수정
- DilatedConvBlock의 In/Out channels 수정
```
The feature dimensions of Dilated Convolutional Blocks are
256, 256, 128, 64, 64 for 2D facial landmark features, and
128, 64, 256, 128, 64 for the head-pose features.
```
- Flatten layer 추가

`TODO`로 검색
- DilatedConvBlock 내부에 있는 Conv2d의 In/Out channels (현재는 임시로 설정)
- 최종 분류층(classifier)에 있는 Linear layer (현재는 임시로 설정) 

In [None]:
!pip install torchsummary

In [None]:
import os
import gc
import pickle
import random
import warnings
from google.colab import drive

import numpy as np
import pandas as pd
from tqdm.auto import tqdm, trange
import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    recall_score,
    precision_score,
    confusion_matrix,
)

In [None]:
drive.mount("/content/drive")
warnings.simplefilter("ignore")


def clear():
    gc.collect()
    torch.cuda.empty_cache()


def join_path(*args):
    return os.path.join("/content/drive/MyDrive/Capstone", *args)


train_csv = join_path("data", "train.pk")
dev_csv = join_path("data", "dev.pk")
test_csv = join_path("data", "test.pk")

train_data = pickle.load(open(train_csv, 'rb'))
dev_data = pickle.load(open(dev_csv, 'rb'))
test_data = pickle.load(open(test_csv, 'rb'))

device = "cuda" if torch.cuda.is_available() else "cpu"
RAND_SEED = 42
BATCH_SIZE = 8
EPOCHS = 100
LEARNING_RATE = 2e-5
SMOOTHING = 0.0
MODEL_PATH = join_path("tdcn.pth")

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(RAND_SEED)

In [None]:
class EarlyStopping(object):
    def __init__(self, patience=2, save_path="model.pth", eps=1e-6):
        self._min_loss = np.inf
        self._patience = patience
        self._path = save_path
        self._eps = eps
        self.__counter = 0

    def should_stop(self, model, loss):
        if loss < self._min_loss:
            self._min_loss = loss
            self.__counter = 0
            torch.save(model.state_dict(), self._path)
        elif loss > self._min_loss + self._eps:
            self.__counter += 1
            if self.__counter >= self._patience:
                return True
        return False

    def load(self, model):
        model.load_state_dict(torch.load(self._path))
        return model

    @property
    def counter(self):
        return self.__counter

# Dataset

In [None]:
class CustomDataset(Dataset):
    """데이터 처리"""
    def __init__(self, data, up_count = 500, data_size = 5000):
        self.data = {}
        for k, v in data.items():
            tmp = {
                'pose' : torch.FloatTensor(v['pose'].astype('float').values),
                'features' : torch.FloatTensor(v['features'].astype('float').values),
                'y' : torch.IntTensor([v['y']])
            }
            self.data[k] = tmp
            break
        self.up_count = up_count
        self.data_size = data_size
        
    def __len__(self):
        return len(self.data) * self.up_count

    def __getitem__(self, index):
        target_data = random.choice(list(self.data.values()))
        x1 = target_data['features']
        x2 = target_data['pose']
        cut = random.randrange(0, len(x1)-self.data_size)
        x1 = x1[cut: cut + self.data_size]
        x2 = x2[cut: cut + self.data_size]
        y = target_data['y']
        return (x1, x2), y

In [None]:
# 데이터셋
training_dataset = CustomDataset(train_data)
dev_dataset = CustomDataset(dev_data, up_count=1)
test_dataset = CustomDataset(test_data, up_count=10)

train_dataloader = DataLoader(training_dataset, batch_size=BATCH_SIZE)
dev_dataloader = DataLoader(dev_dataset, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# Dilated Conv Block

In [None]:
class DilatedConvBlock(nn.Module):
    """모델 내의 DCN Block"""

    def __init__(self, in_channels, out_channels, has_BN=False, batch_size=8):
        super(DilatedConvBlock, self).__init__()
        self._has_BN = has_BN
        cnn_args = {
            "kernel_size": (3, 3),
            "padding": "same",
        }
        """TODO: DilatedConvBlock 내부에 있는 Conv2d의 In/Out channels
        단순히 첫 Conv의 in_channels과 마지막 Conv의 out_channels만
        논문에 따라 맞춰둔 상태입니다. 중간에 있는 channel 값들은
        어떻게 설정해야 할지 답을 찾지 못했습니다. 
        """
        self.dilated_conv1 = nn.Conv2d(
            in_channels, out_channels, dilation=1, **cnn_args
        )
        self.dilated_conv2 = nn.Conv2d(
            out_channels, out_channels, dilation=2, **cnn_args
        )
        self.dilated_conv3 = nn.Conv2d(
            out_channels, out_channels, dilation=4, **cnn_args
        )
        self.conv1d = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        self.batch_norm = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x_1d = self.conv1d(x)

        # DCN with d=1
        x_2d_1 = self.dilated_conv1(x)
        x_2d_2 = self.dilated_conv1(x)
        x_2d = x_2d_1 + x_2d_2
        x_2d = F.elu(x_2d)

        # DCN with d=2
        x_2d_1 = self.dilated_conv2(x_2d)
        x_2d_2 = self.dilated_conv2(x_2d)
        x_2d = x_2d_1 + x_2d_2
        x_2d = F.elu(x_2d)

        # DCN with d=4
        x_2d_1 = self.dilated_conv3(x_2d)
        x_2d_2 = self.dilated_conv3(x_2d)
        x_2d = x_2d_1 + x_2d_2
        x_2d = F.elu(x_2d)

        x = x_1d + x_2d

        if self._has_BN:
            x = self.batch_norm(x)
        return x

In [None]:
class TDCN(nn.Module):
    """모델 전체 구조"""

    def __init__(self, channels, batch_size=8):
        super(TDCN, self).__init__()

        dcn_args = {"has_BN": True, "batch_size": batch_size}
        pool_args = {"kernel_size": (2, 1)}
        self.tdcn = nn.Sequential(
            DilatedConvBlock(channels[0], channels[1], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(channels[1], channels[2], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(channels[2], channels[3], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(channels[3], channels[4], **dcn_args),
            nn.MaxPool2d(**pool_args),
            DilatedConvBlock(
                channels[4], channels[5], batch_size=batch_size, has_BN=False
            ),
        )

    def forward(self, x):
        x = self.tdcn(x)
        return x

In [None]:
class PredictionModel(nn.Module):
    """모델 전체 구조"""
    def __init__(self, batch_size=8):
        super(PredictionModel, self).__init__()
        self.feat_tdcn = TDCN(channels=[1, 256, 256, 128, 64, 64], batch_size=batch_size)
        self.pose_tdcn = TDCN(channels=[1, 128, 64, 256, 128, 64], batch_size=batch_size)
        self.attention = nn.Sequential(
            nn.Linear(142, 142),
            nn.ReLU(inplace=True),
            nn.Linear(142, 142),
            nn.Sigmoid(),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(1, -1),
            nn.Linear(142 * 312 * 64, 142), 
            nn.Linear(142, 4), 
            nn.Linear(4, 2),
            nn.Softmax(-1)
        )
        """TODO: 최종 분류층(classifier)에 있는 Linear layer
        1. nn.Linear의 input -> output이 급격하게 감소해도 
        모델 성능에 영향을 미치지 않는지 궁금합니다. 
        2. Linear 이후에 활성화 함수가 논문에는 명시되어 있지 않아
        따로 추가를 해야할지 궁금합니다.  

        + nn.Linear(142 * 312 * 64, 142),  
        이 부분은 수정이 필요할 수 있습니다. 
        """

    def fwa(self, x1, x2):
        x = torch.cat((x1, x2), dim=-1)
        x_ = torch.mean(x, dim=2)
        x_ = self.attention(x_)
        x_ = torch.unsqueeze(x_, 2)
        x = torch.mul(x, x_)
        return x
    
    def forward(self, feat_x, pose_x):
        feat_x = self.feat_tdcn(feat_x)
        pose_x = self.feat_tdcn(pose_x)
        out = self.fwa(feat_x, pose_x)
        out = self.classifier(out)
        return out

# Train

In [None]:
loss_fn = nn.CrossEntropyLoss(label_smoothing=SMOOTHING)
model = PredictionModel(BATCH_SIZE)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

In [None]:
model.to(device)
summary(model, [(1, 5000, 136), (1, 5000, 6)], batch_size=BATCH_SIZE)

In [None]:
def train(train_loader, dev_loader, model, loss_fn, optimizer):
    model.to(device)
    loss_fn.to(device)
    optimizer.zero_grad()
    num_batches = len(train_loader)
    early_stopper = EarlyStopping(patience=3, save_path=MODEL_PATH)

    epoch_progress = trange(1, EPOCHS + 1)
    tqdm.write("\nEpoch | Train Loss | Test Loss")
    tqdm.write("-" * 30)

    for epoch in epoch_progress:
        model.train()
        train_loss = 0
        for (x1, x2), label in train_loader:
            x1 = x1.unsqueeze(1).to(device)
            x2 = x2.unsqueeze(1).to(device)
            label = label.to(device).long().reshape(-1)
            # x1: [8, 1, 5000, 136] > [Batch, Channel, Height, Width]
            # x2: [8, 1, 5000, 6] > [Batch, Channel, Height, Width]
            # label: [8] > [Height]

            pred = model(x1, x2)
            # pred: [8, 2] > [Batch, Logits with Softmax]
            loss = loss_fn(pred, label)
            train_loss += loss.item()

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        train_loss /= num_batches

        dev_loss = evaluate(dev_loader, model, loss_fn)
        tqdm.write(f"{epoch:5} | {train_loss:10.5f} | {dev_loss:9.5f}")

        if early_stopper.should_stop(model, dev_loss):
            tqdm.write(f"--EarlyStopping: [Epoch: {epoch - early_stopper.counter}]")
            break

    model = early_stopper.load(model)
    return model


def evaluate(test_loader, model, loss_fn, return_metrics=False):
    model.eval()
    num_batches = len(test_loader)
    test_loss = 0
    true_labels = list()
    pred_values = list()

    with torch.no_grad():
        for (x1, x2), label in test_loader:
            x1 = x1.unsqueeze(1).to(device)
            x2 = x2.unsqueeze(1).to(device)
            label = label.to(device).long().reshape(-1)

            pred = model(x1, x2)
            loss = loss_fn(pred, label)
            test_loss += loss.item()

            if return_metrics:
                true_labels += label.detach().cpu().numpy().tolist()
                pred_values += pred.argmax(-1).detach().cpu().numpy().tolist()

    if not return_metrics:
        # 학습 과정에서는 Loss 값만 확인합니다.
        test_loss /= num_batches
        return test_loss

    else:
        # 학습이 종료되고 성능 평가 지표를 확인합니다.
        accuracy = accuracy_score(true_labels, pred_values)
        f1 = f1_score(true_labels, pred_values)
        f1_macro = f1_score(true_labels, pred_values, average="macro")
        recall = recall_score(true_labels, pred_values)
        precision = precision_score(true_labels, pred_values)
        matrix = confusion_matrix(true_labels, pred_values).ravel()

        return {
            "accuracy": accuracy,
            "f1": f1,
            "f1-macro": f1_macro,
            "recall": recall,
            "precision": precision,
            "loss": test_loss,
            "matrix": matrix,
        }

In [None]:
# 모델 학습
model = train(train_dataloader, dev_dataloader, model, loss_fn, optimizer)

In [None]:
# 모델 저장
torch.save(
    {
        "epoch": EPOCHS,
        "learning_rate": LEARNING_RATE,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
    },
    MODEL_PATH,
)

# Evaluation

In [None]:
clear()
metrics = evaluate(test_dataloader, model, loss_fn, return_metrics=True)

print(f"Accuracy:  {metrics['accuracy']:.3f}")
print(f"F1-score:  {metrics['f1']:.3f}")
print(f"F1-macro:  {metrics['f1-macro']:.3f}")
print(f"Recall:    {metrics['recall']:.3f}")
print(f"Precision: {metrics['precision']:.3f}")

print("-" * 30)
tn, fp, fn, tp = metrics["matrix"]
print(f"TN: {tn}")
print(f"FP: {fp}")
print(f"FN: {fn}")
print(f"TP: {tp}")

In [None]:
def show_probs(test_data, model, max=6):
    dataloader = DataLoader(test_data, batch_size=1, shuffle=True)

    model.eval()
    neg_max = max // 2
    pos_max = max - neg_max
    pos_count = 0
    neg_count = 0
    with torch.no_grad():
        for (x1, x2), label in dataloader:
            x1 = x1.unsqueeze(1).to(device)
            x2 = x2.unsqueeze(1).to(device)

            if label.item() == 0 and pos_count < pos_max:
                pos_count += 1
                label = label.item()
            elif label.item() == 1 and neg_count < neg_max:
                neg_count += 1
                label = label.item()
            elif pos_count + neg_count == max:
                break
            else:
                continue

            pred = model(x1, x2)
            normal, abnormal = pred.squeeze(0)
            print(f"{label}: [{normal:.3f}  {abnormal:.3f}]")


show_probs(test_dataset, model, 30)