In [None]:
!pip install wandb  # wandb 설치

In [None]:
import wandb

In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score

from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

import warnings
warnings.filterwarnings('ignore')

# Google Colab 연결
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.nn.modules.loss import _Loss
from torch.optim.lr_scheduler import _LRScheduler


from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

from copy import deepcopy
from tqdm import tqdm
import time
import os
import math
import random

In [None]:
# 사용할 GPU 지정
print("number of GPUs: ", torch.cuda.device_count())
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
use_cuda = torch.cuda.is_available()
print("Does GPU exist? : ", use_cuda)
DEVICE = torch.device("cuda" if use_cuda else "cpu")

In [None]:
# Seed 고정
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed_everything(2022)

In [None]:
# data 읽기
train = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/train.csv') # 2335 rows 34 columns
test = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/test.csv')   # 9343 rows 33 columns
submission = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/sample_submission.csv')

In [None]:
# MLP는 data 검증용으로 사용

sub2 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_2.csv')
sub3 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_3.csv')
sub4 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_4.csv')
sub5 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_5.csv')
sub6 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_6.csv')
sub7 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_7.csv')
sub8 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_8.csv')
sub9 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_9.csv')
sub10 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_10.csv')
sub11 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_11.csv')
sub12 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_12.csv')
sub13 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_13.csv')
sub17 = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_17.csv')


same_inference = sub2[(sub2['target'] == sub3['target']) & (sub2['target'] == sub4['target']) & (sub2['target'] == sub5['target'])
     & (sub2['target'] == sub6['target']) & (sub2['target'] == sub7['target']) & (sub2['target'] == sub8['target'])
      & (sub2['target'] == sub9['target']) & (sub2['target'] == sub10['target']) & (sub2['target'] == sub11['target'])
       & (sub2['target'] == sub12['target']) & (sub2['target'] == sub13['target']) & (sub2['target'] == sub17['target'])]

same_idx = list(same_inference['id']-1)
new_train = test.loc[same_idx]
new_train['target'] = list(same_inference['target'])

# train + test 행 병합 후 scaling
all_data = pd.concat([train.iloc[:,1:-1], test.iloc[:,1:]])

# min max scaling
mins = all_data.min()
maxs = all_data.max()

all_minmax = (all_data - mins) / (maxs - mins)
all_minmax.describe()

# Train 데이터 X,y  /  Test 데이터 X 나눠주기
X = all_minmax.iloc[:9280, :]   # new_train 9280  11678/ original = 2335
y = train.iloc[:, -1]

test_X = all_minmax.iloc[9280:, :]  # new_train 9280  11678/ original = 2335

In [None]:
class my_dataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

In [None]:
class My_Model(nn.Module):
    
    def __init__(self, num_features, num_classes):
        super(My_Model, self).__init__()
        
        self.Layer_1 = nn.Linear(num_features, 64)  # origin = (num_features, 32) / 64
        self.Layer_2 = nn.Linear(64, 64)            # origin = (32, 16)           / (64, 128)
        self.Layer_3 = nn.Linear(64, 32)            # origin = (16, 8)            / (32, 16)

        self.BatchNorm_1 = nn.BatchNorm1d(64)       # origin = 32 / 64
        self.BatchNorm_2 = nn.BatchNorm1d(64)       # origin = 16 / 32

        self.Layer_out = nn.Linear(32, num_classes) # origin = (8, num_classes) / 16

        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        
        x = self.Layer_1(inputs)
        x = self.BatchNorm_1(x)
        x = self.relu(x)
        
        x = self.Layer_2(x)
        x = self.BatchNorm_2(x)
        x = self.relu(x)

        x = self.Layer_3(x)
        x = self.relu(x)

        x = self.Layer_out(x)
        
        return x

In [None]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """

    def __init__(self,
                 optimizer: torch.optim.Optimizer,
                 first_cycle_steps: int,
                 cycle_mult: float = 1.,
                 max_lr: float = 0.1,
                 min_lr: float = 0.001,
                 warmup_steps: int = 0,
                 gamma: float = 1.,
                 last_epoch: int = -1
                 ):
        assert warmup_steps < first_cycle_steps

        self.first_cycle_steps = first_cycle_steps  # first cycle step size
        self.cycle_mult = cycle_mult  # cycle steps magnification
        self.base_max_lr = max_lr  # first max learning rate
        self.max_lr = max_lr  # max learning rate in the current cycle
        self.min_lr = min_lr  # min learning rate
        self.warmup_steps = warmup_steps  # warmup step size
        self.gamma = gamma  # decrease rate of max learning rate by cycle

        self.cur_cycle_steps = first_cycle_steps  # first cycle step size
        self.cycle = 0  # cycle count
        self.step_in_cycle = last_epoch  # step size of the current cycle

        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)

        # set learning rate min_lr
        self.init_lr()

    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)

    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr) * self.step_in_cycle / self.warmup_steps + base_lr for base_lr in
                    self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle - self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int(
                    (self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(
                        self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch

        self.max_lr = self.base_max_lr * (self.gamma ** self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [None]:
def init_weight(model, kind='xavier'):
    for name, i in model.named_parameters():
        if kind == 'xavier':
            if i.dim() < 2:
                continue
            if 'weight' in name:
                init.xavier_normal_(i, gain=1.0)
            elif 'bias' in name:
                init.xavier_uniform_(i, gain=1.0)
            else:
                pass
        elif kind == 'kaiming':
            if i.dim() < 2:
                continue
            if 'weight' in name:
                init.kaiming_normal_(i)
            elif 'bias' in name:
                init.kaiming_uniform_(i)
            else:
                pass

In [None]:
ohe = OneHotEncoder(sparse = False)
y = ohe.fit_transform(train[['target']])    
new_y = (y+0.05)/1.2                        # label smoothing
y = new_y

In [None]:
!wandb login --relogin

In [None]:
CFG = {
    'seed':2022,
    'fold':5, # 학습시킬 fold
    'n_split':5, # fold 개수
    'batch_size': 1024, # 1024
    'num_classes': 4,
    'epoch': 100,
    'model_name': "MLP", 
    'initialization': "kaiming", # kaiming, xavier
    'color': 'rgb'
}

wandb.config = CFG
experiment_name = 'label_smooth_enhance_training'

In [None]:
mlp_pred = np.zeros([9343, 4])
mlp_acc = list()
accuracy_stats_list = list()
loss_stats_list = list()
loss_flag = False

for fold, (tr_idx, val_idx) in enumerate(skf.split(X, y)):

    # wandb 기록시작
    run = wandb.init(project=f"{CFG['model_name']}", settings=wandb.Settings(start_method="thread"), name=f"{experiment_name}_{CFG['n_split']}split_{fold}")

    print(f'\n --------------- Begin  Fold {fold+1} Training !! ---------------- \n')
    
    loss_list = []
    acc_list = []

    best_acc = 0    # val_acc 가장 높은 모델로 test 예측 진행
    best_loss = 999999
    
    X_train = X.iloc[tr_idx,:]
    y_train = y[tr_idx]

    X_val = X.iloc[val_idx,:]
    y_val = y[val_idx]

    train_dataset = my_dataset(torch.FloatTensor(X_train.to_numpy()), torch.FloatTensor(y_train))
    valid_dataset = my_dataset(torch.FloatTensor(X_val.to_numpy()), torch.FloatTensor(y_val))

    cosine_annealing_scheduler_arg = dict(
        first_cycle_steps=len(train_dataset)//CFG['batch_size'] * CFG['epoch'] / 2, # CFG['epoch']
        cycle_mult=2.0,
        max_lr=4e-03,   #4e-05 / lr3 = 3e-04
        min_lr=1e-06,
        warmup_steps=len(train_dataset)//CFG['batch_size'] * 3,
        gamma=0.7   # 0.9
    )

    train_loader = DataLoader(dataset=train_dataset, batch_size=CFG['batch_size'], shuffle=True, pin_memory=True)
    valid_loader = DataLoader(dataset=valid_dataset, batch_size=1, shuffle=False, pin_memory=True)

    model = My_Model(num_features=32, num_classes=4)
    init_weight(model, kind='kaiming')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0)
    scheduler = CosineAnnealingWarmupRestarts(optimizer, **cosine_annealing_scheduler_arg)

    if use_cuda:
        model = model.to(DEVICE)
        criterion = criterion.to(DEVICE)
    
    for epoch in range(CFG['epoch']):

        train_losses = 0
        train_mean_acc = 0

        model.train()
        for i, (X_train_batch, y_train_batch) in enumerate(train_loader):

            X_train_batch, y_train_batch = X_train_batch.to(DEVICE), y_train_batch.to(DEVICE)
            _y_train_batch = deepcopy(y_train_batch)

            #Forward 
            y_train_output = model(X_train_batch)    # train loader로부터 나오는것도 gpu상으로올려줘야함
            y_train_pred_softmax = torch.log_softmax(y_train_output, dim = 1) 
            _, y_train_pred_tags = torch.max(y_train_pred_softmax, dim = 1) # y_pred = torch.max(y_output, 1)[1] -> y_pred_tags

            train_acc = accuracy_score(y_train_pred_tags.data.cpu(), np.argmax(_y_train_batch.data.cpu(), axis=1))
            train_mean_acc += train_acc

            train_loss = criterion(y_train_output.to(DEVICE), y_train_batch)
            train_losses += train_loss
            
            loss_list.append(train_loss.item())
            acc_list.append(train_acc)

            #Backward
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
            scheduler.step()
            
             
        valid_losses = 0
        valid_mean_acc = 0

        valid_y_pred = []
        valid_acc_list = []
        with torch.no_grad():

            model.eval()
            for X_valid_batch, y_valid_batch in valid_loader:  

                X_valid_batch, y_valid_batch = X_valid_batch.to(DEVICE), y_valid_batch.to(DEVICE)
                _y_valid_batch = deepcopy(y_valid_batch)

                y_valid_output = model(X_valid_batch)
                y_valid_pred_softmax = torch.log_softmax(y_valid_output, dim = 1)
                _, y_valid_pred_tags = torch.max(y_valid_pred_softmax, dim = 1)

                valid_acc = accuracy_score(y_valid_pred_tags.data.cpu(), np.argmax(_y_valid_batch.data.cpu(), axis=1))
                valid_loss = criterion(y_valid_output, y_valid_batch)
                valid_losses += valid_loss

                
                valid_acc_list.append(valid_acc)
        
        valid_acc = np.mean(valid_acc_list)
        

        if best_acc < valid_acc:  
            print(f'best model changed! val_acc = {valid_acc} train_acc = {train_acc}') 
            best_model = deepcopy(model.state_dict())
            best_acc = valid_acc

        if (epoch+1) % 10 == 0:
            print(f'Fold [{fold+1}/{skf.n_splits}] Epoch [{epoch+1}/{CFG["epoch"]}] Step [{i+1}/{len(train_loader)}] Loss: [{train_loss.item():.4f}] Train ACC [{train_acc*100:.2f}%] Valid ACC: [{valid_acc*100:.2f}%]')
        
        # wandb 기록
        wandb_dict = {
            'train loss': train_losses / len(train_loader),
            'train acc': train_mean_acc / len(train_loader),
            'valid loss': valid_losses / len(valid_loader),
            'valid acc': valid_acc,
            'learning rate': scheduler.get_lr()[0]
            }

        wandb.log(wandb_dict)

    print(f'@@@ {fold + 1} best model prediction !! @@@')        
    test_output = model(torch.FloatTensor(test_X.iloc[:,:].to_numpy()).to(DEVICE))
    mlp_pred += (test_output.cpu().detach().numpy()/skf.n_splits)   # y_test_pred_softmax / test_output

In [None]:
# 결과확인
a = torch.FloatTensor(mlp_pred)
b = torch.softmax(a, dim = 1)
c = np.argmax(b, axis = 1)

submission['target'] = c
submission.target.value_counts()

In [None]:
# 이전 데이터들과 비교
sub_compare = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_12.csv')
submission[submission['target']!=sub_compare['target']]

In [None]:
# 결과 제출
submission.to_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/submission/submit_19.csv', index = False)