<a href="https://colab.research.google.com/github/TaeHoon-Jin/Dacon_Ex/blob/main/PyTorch_%EC%B4%88%EC%8B%AC%EC%9E%90_%EA%B0%80%EC%9D%B4%EB%93%9C_%EB%8D%B0%EC%9D%B4%ED%84%B0%EC%85%8B%EB%B6%80%ED%84%B0_%EB%AA%A8%EB%8D%B8_%ED%95%99%EC%8A%B5%EA%B9%8C%EC%A7%80.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Abstract**

해당 베이스라인은 저처럼 인공지능 대회에 처음 접하시는 분들이나 pytorch에 처음 접하시는 분들과 지식을 공유하고자 하는 목적으로 준비하였습니다. 
<br>
저 또한 책과 인터넷을 찾아가며 만든 지식이라 부족할 수 있고,
<br> 
코드의 가독성에 주의를 기울였으나 직관적이지 않은 부분, 잘 모르시겠거나 잘못된 부분은 지적해주시면 감사하겠습니다. 
<br>
베이스라인에 모델의 구체적인 내용은 포함하지 않고 있으며 단순히 Conv과 upsampling을 토대로 한 전체적인 흐름에 대해 이해에 도움드리고자 드리는 코드입니다.
<br>



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from glob import glob
import copy

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchvision import transforms, utils
from torchsummary import summary

device = None

if torch.cuda.is_available() :
    device = torch.device('cuda')
else : 
    device = torch.device('cpu')
print('Using PyTorch version:', torch.__version__, ' Device:', device)

Using PyTorch version: 1.8.0+cu101  Device: cuda


# 데이터 확인하기

In [None]:
def sorted_list(path):
    
    tmplist = glob.glob(path)
    tmplist.sort()
    
    return tmplist

def read_csv(path):

    df = pd.read_csv(path)
    
    return df

def load_npy(path):
    
    npy = np.load(path)
    
    return npy

def show_data(npy):
    
    num_channel = npy.shape[-1]
    plt.figure(figsize=(3*num_channel, 5))
    for channel in range(num_channel):
        tmpimg = npy[:, :, channel]
        plt.subplot(1, num_channel, channel+1)
        plt.imshow(tmpimg)
    plt.tight_layout()
    plt.show()
    plt.close()

# **PyTorch Computer Vision cookbook** - Michael Avendi
이 책의 내용이 다수 포함되어 있으며, 파이토치를 처음 접하신다면 적극추천하는 책입니다.
영어로 되어있지만 코드가 매우 이해하기 쉽게 설명되어 있습니다

# **Creating Dataset**

저의 경우, 482장의 데이터파일을 8개씩 묶어 6장을 통해 뒤의 2장을 예측할 수 있도록 분리하여 저장하도록 하였습니다. 

In [None]:
data_path

'./drive/MyDrive/Colab Notebooks/Dacon/Monthly Dacon/Monthly Dacon 13/data'

In [None]:
submission = pd.read_csv('./drive/MyDrive/Colab Notebooks/Dacon/Monthly Dacon/Monthly Dacon 13/data/sample_submission.csv')

In [None]:
submission.shape

(24, 136193)

In [None]:
data_path = './drive/MyDrive/Colab Notebooks/Dacon/Monthly Dacon/Monthly Dacon 13/data'
train_data_path = os.path.join(data_path, "train")
print(train_data_path)
file_list = os.listdir(train_data_path)
file_list.sort()


./drive/MyDrive/Colab Notebooks/Dacon/Monthly Dacon/Monthly Dacon 13/data/train


In [None]:
pwd

'/content'

In [None]:
## Custom Dataset which return x_frames, y_frames

torch.set_printoptions(threshold=10000) # show all tensor without abbreviation

class SeaIceDataset(Dataset):
    def __init__(self, data_dir, transform, data_type="train", frame_num=6, predict_num=2, stride=1):
        super(SeaIceDataset, self).__init__()

        """
        data_dir                => data folder path
        transform               => data to tensor
        data_type="train"       => choose train / valid / test
        frame_num               => frame nums to use on train 
        predict_num             => frame nums to predict
        stride_num              => stride for frames (if stride=2 => 197811.npy, 198001.npy, 198003.npy ... )
                                   만약 8월끼리 비교하고 싶다면 stride = 12 를 넣어준다.
        """

        data_to_path = os.path.join(data_dir, data_type)
        filenames = os.listdir(data_to_path)
        self.filepaths = [os.path.join(data_to_path, filename) for filename in sorted(filenames)]
        
        self.transform = transform  # numpy 배열을 tensor 배열로 바꿔주는 함수
        self.frame_num = frame_num 
        self.predict_num = predict_num
        self.stride = stride

    def __len__(self):
        # len = dataset으로 시작가능한 인덱스 번호 
        return len(self.filepaths) - (self.frame_num + self.predict_num - 1) * self.stride
    
    def __getitem__(self, idx):
        """
        it will return (x_with_frame_num, y_true_with_predict_num)
        if frame_num = 6, predict_num = 2
        ((6, 1, 448, 304), (2, 1, 448, 304))
        """
        dataset = []
        for id in range(idx, idx + self.frame_num + self.predict_num, self.stride):
            cur_npy = np.load(self.filepaths[id])[:,:,0]/250    # 250을 나눠주어 저장하지 않으면 toTensor했을때 오차값이 크게 생겼습니다
            cur_tensor = self.transform(cur_npy)                # tensor로 저장
            dataset.append(cur_tensor)
        x = torch.stack(dataset[:self.frame_num])
        x = x.transpose(0,1).to(dtype=torch.float)              # [1, 6, 448, 304] => [channel, frames, height, width]
        y = torch.stack(dataset[self.frame_num:])               
        y = y.transpose(0,1)                                    # [1, 2, 448, 304] => [channel, frames, height, width]
        return x, y

def getTransform():
    return transforms.Compose([transforms.ToTensor()])



transform = getTransform()

ice_dataset = SeaIceDataset(data_path, transform, "train", 6, 2, 1)

a,b = ice_dataset[1]        # sample to see 
print(len(ice_dataset))     # 데이터셋에 있는 총 데이터의 개수는 8개씩 묶여있는 475개의 데이터가 있습니다
print(a.shape, b.shape)


475
torch.Size([1, 6, 448, 304]) torch.Size([1, 2, 448, 304])


In [None]:
# Create splited Dataset to Train and Valid

len_ice_dataset = len(ice_dataset)
len_ice_train = int(0.8*len_ice_dataset)
len_ice_valid = len_ice_dataset - len_ice_train


train_dataset, valid_dataset = random_split(ice_dataset, [len_ice_train, len_ice_valid])
print(f"train dataset length : {len(train_dataset)}")
print(f"valid dataset length : {len(valid_dataset)}")

# show one of train_ds
for x, y in train_dataset:
    print(x.shape, y.shape)
    break

train dataset length : 380
valid dataset length : 95
torch.Size([1, 6, 448, 304]) torch.Size([1, 2, 448, 304])


In [None]:
# Creating DataLoader
# too slow now...

BATCH_SIZE = 12

# Dataloader 클래스는 데이터셋에서 배치 개수만큼 뽑아서 제공해줍니다
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_dataloader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)

# show example
# train_dataloader length => 32
for x, y in train_dataloader:
    print(x.shape, y.shape)
    break

torch.Size([12, 1, 6, 448, 304]) torch.Size([12, 1, 2, 448, 304])


# **Building Model**

학습시킬 모델의 구조에 대해 알아보려고 합니다
<br>
모델의 경우, 사용될 레이어를 init에 초기화하고 forward에서 적용하는 방식으로 진행됩니다
<br>

In [None]:
MODEL_PARAMS = {
    "shape" : (6, 1, 448, 304),
    "init_filters": 8,
    "dropout_rate" : 0.5
}

In [None]:
# Creating Model

class CustomNet(nn.Module):
    def __init__(self, params):
        super(CustomNet, self).__init__()
        input_frames, input_channel, input_height, input_width = params["shape"] # input_frames? input_batch?
        init_filters = params["init_filters"]
        self.dropout_rate = params["dropout_rate"]
        self.conv1 = nn.Conv3d(input_channel, init_filters, kernel_size=3, padding=1)
        self.conv2 = nn.Conv3d(init_filters, init_filters*2, kernel_size=3, padding=1)
        self.conv3 = nn.ConvTranspose3d(init_filters*2, 1, kernel_size=3, padding=1)

    def forward(self, x):
        input = x
        x = F.relu(self.conv1(x))
        x = F.max_pool3d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.upsample(x, size=(2, 448, 304))
        print("input: ", input.shape)
        print("output: ", x.shape)
        return x


In [None]:
my_model = CustomNet(MODEL_PARAMS).to(device) # to(device) 해줘야 에러가 안남
print(my_model)
summary(my_model, input_size=(1, 6, 448, 304), device=device.type) # summary 함수를 통해 임의의 사이즈를 넣어 구조와 파라미터를 확인할 수 있습니다

CustomNet(
  (conv1): Conv3d(1, 8, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv2): Conv3d(8, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (conv3): ConvTranspose3d(16, 1, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
)
input:  torch.Size([2, 1, 6, 448, 304])
output:  torch.Size([2, 1, 2, 448, 304])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 8, 6, 448, 304]             224
            Conv3d-2      [-1, 16, 3, 224, 152]           3,472
   ConvTranspose3d-3       [-1, 1, 3, 224, 152]             433
Total params: 4,129
Trainable params: 4,129
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 3.12
Forward/backward pass size (MB): 63.12
Params size (MB): 0.02
Estimated Total Size (MB): 66.26
----------------------------------------------------------------




# **Loss Function & Metric Function**

손실함수에 mae_over_f1을 매트릭함수에 mae_score, f1_score를 넣었습니다.
해당 과정에서도 np는 tensor로 변경해주어야 합니다

<br>
이전의 결과값이 250\*0.05 < y < 250\*0.5 였지만  
데이터셋에서 250을 나눠주는 과정을 겪었기 때문에 여기서도 250대신 1을 이용하였습니다.  

In [None]:
# Loss Function && etric Function

# metrics
def mae_score(true, pred):
    true, pred = numpy_to_tensor(true, pred)
    score = np.mean(np.abs(true-pred))
    
    return score

# metrics
def f1_score(true, pred):
    true, pred = numpy_to_tensor(true, pred)

    target = np.where((true>1*0.05)&(true<1*0.5))
    
    true = true[target]
    pred = pred[target]
    true = np.where(true < 1*0.15, 0, 1)
    pred = np.where(pred < 1*0.15, 0, 1)
    
    right = np.sum(true * pred == 1)
    precision = right / np.sum(true+1e-8)
    recall = right / np.sum(pred+1e-8)
    score = 2 * precision*recall/(precision+recall+1e-8)
    
    return score
    
# loss function
def mae_over_f1(true, pred):
    mae = mae_score(true, pred)
    f1 = f1_score(true, pred)
    score = mae/(f1+1e-8)
    
    return score

def numpy_to_tensor(true, pred):
    return true.cpu().detach().numpy(), pred.cpu().detach().numpy()


# **Optimizer**

최적화는 아담을 이용하였습니다. 
<br>
시간이 지남에 따라 학습률을 감소시킴으로써 정확도를 높일 수 있을 것 같습니다

In [None]:
# Optimizer

# adam optimizer
opt_adam = optim.Adam(my_model.parameters(), lr=3e-4)

def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group["lr"]

# check our learning rate
current_lr = get_lr(opt_adam)
print(f"current_lr = {current_lr}")



# learning rate scheduler
lr_scheduler = ReduceLROnPlateau(opt_adam, mode="min", factor=0.5, patience=20, verbose=1)

# example 
for i in range(100):
    lr_scheduler.step(1)

current_lr = 0.0003
Epoch    22: reducing learning rate of group 0 to 1.5000e-04.
Epoch    43: reducing learning rate of group 0 to 7.5000e-05.
Epoch    64: reducing learning rate of group 0 to 3.7500e-05.
Epoch    85: reducing learning rate of group 0 to 1.8750e-05.


# **Training Setting**

에폭 한번할때마다 loss_epoch함수를 실행하는데
<br>
dataloader의 x,y 마다 
<br>
metric_batch를 통해 metric값을 계산하고
<br>
loss_batch를 통해 loss 값을 계산합니다
<br>


In [None]:
# Training 

def metrics_batch(pred, true, metrics):
    # if needed add param "metrics" to custom
    """
    output will be pred
    target will be corrects
    """
    if metrics:
        return list(map(lambda x: x(true, pred), metrics))
    mae_score = mae_score(true, pred)
    f1_score = f1_score(true, pred)
    return (mae_score, f1_score)

def loss_batch(loss_func, pred, true, opt=None):
    """
    loss_func => mae_over_f1
    """
    loss = loss_func(true, pred)
    with torch.no_grad():
        metrics = metrics_batch(pred, true, [mae_score, f1_score])
    if opt is not None:
        opt.zero_grad()
        # loss.backward()
        opt.step()  # 학습이 이뤄지는 곳
    return loss, metrics

def loss_epoch(model, loss_func, dataset_dataloader, sanity_check=False, opt=None):
    running_loss = 0.0
    running_metric = [0.0, 0.0]
    len_data = len(dataset_dataloader.dataset)

    for x, y in dataset_dataloader:
        x = x.to(device)
        y = y.to(device)
        # 모델 결과
        pred = model(x)
        # 손실함수 구하기
        loss, metrics = loss_batch(loss_func, pred, y, opt)
        # 손실함수 
        running_loss += loss
        if metrics is not None:
            for idx, metric_value in enumerate(metrics):
                running_metric[idx] += metric_value
        
        # 문제 있으면 break, 여기서는 True 일때 바로 break
        if sanity_check is True:
            break
    
    loss = running_loss / float(len_data)
    metrics = list(map(lambda x: x/float(len_data), metrics))
    print(loss, metrics)
    return loss, metrics

In [None]:
loss_func = mae_over_f1
opt_adam = optim.Adam(my_model.parameters(), lr=3e-4)
lr_scheduler = ReduceLROnPlateau(opt_adam, mode="min", factor=0.5, patience=20, verbose=1)

TRAIN_PARAMS = {
    "num_epochs" : 10,
    "loss_func" : loss_func,
    "optimizer" : opt_adam,
    "train_dataloader" : train_dataloader,
    "valid_dataloader" : valid_dataloader,
    "sanity_check" : True,
    "lr_scheduler" : lr_scheduler,
    "save_path" : "./weights.pt"
}


In [None]:
def train(model, params):
    num_epochs = params['num_epochs']
    loss_func = params['loss_func']
    opt = params["optimizer"]
    train_dataloader = params['train_dataloader']
    valid_dataloader = params['valid_dataloader']
    sanity_check = params['sanity_check']
    lr_scheduler = params['lr_scheduler']
    save_path = params['save_path']

    # keep history of the loss and metric
    loss_hist = {
        "train" : [],
        "valid" : []
    }

    metrics_hist = {
        "train" : [],
        "valid" : []
    }

    # copy best weights
    best_model_weights = copy.deepcopy(model.state_dict())
    # init best loss
    best_loss = float("inf")

    for epoch in range(num_epochs):
        current_lr = get_lr(opt)
        print(f'Epoch:{epoch}/{num_epochs-1}, current lr:{current_lr}')
        model.train()
        train_loss, train_metrics = loss_epoch(model, loss_func, train_dataloader, sanity_check, opt)

        # save history
        loss_hist["train"].append(train_loss)
        metrics_hist["train"].append(train_metrics)

        # model.eval()
        # with torch.no_grad():
    

    return model, loss_hist, metrics_hist



In [None]:
my_model, loss_hist, metrics_hist = train(my_model, TRAIN_PARAMS)

print(loss_hist)

Epoch:0/9, current lr:0.0003




input:  torch.Size([12, 1, 6, 448, 304])
output:  torch.Size([12, 1, 2, 448, 304])
0.0008948173459509091 [0.00047206722502364897, 0.0013883080755960155]
Epoch:1/9, current lr:0.0003
input:  torch.Size([12, 1, 6, 448, 304])
output:  torch.Size([12, 1, 2, 448, 304])
0.0008948173459191517 [0.0004720672250068951, 0.0013883080755960155]
Epoch:2/9, current lr:0.0003
input:  torch.Size([12, 1, 6, 448, 304])
output:  torch.Size([12, 1, 2, 448, 304])
0.0008948173458064715 [0.00047206722494744983, 0.0013883080755960155]
Epoch:3/9, current lr:0.0003
input:  torch.Size([12, 1, 6, 448, 304])
output:  torch.Size([12, 1, 2, 448, 304])
0.0008948173457354976 [0.00047206722491000713, 0.0013883080755960155]
Epoch:4/9, current lr:0.0003
input:  torch.Size([12, 1, 6, 448, 304])
output:  torch.Size([12, 1, 2, 448, 304])
0.0008948173459106694 [0.0004720672250024202, 0.0013883080755960155]
Epoch:5/9, current lr:0.0003
input:  torch.Size([12, 1, 6, 448, 304])
output:  torch.Size([12, 1, 2, 448, 304])
0.0008948