In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score

from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

import warnings
warnings.filterwarnings('ignore')

# Google Colab 연결
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Seed 고정
import torch
import random

def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed_everything(2022)

In [None]:
train = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/train.csv') # 2335 rows 34 columns
test = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/test.csv')   # 9343 rows 33 columns
submission = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/sample_submission.csv')

In [None]:
def get_post(x_in):
    x = LeakyReLU()(x_in)
    x = BatchNormalization()(x)
    return x

def get_block(x_in, ch_in, ch_out):
    x = Conv2D(ch_in,
               kernel_size=(1, 1),
               padding='same',
               use_bias=False)(x_in)
    x = get_post(x)

    x = DepthwiseConv2D(kernel_size=(1, 3), padding='same', use_bias=False)(x)
    x = get_post(x)
    x = MaxPool2D(pool_size=(2, 1),
                  strides=(2, 1))(x) # Separable pooling

    x = DepthwiseConv2D(kernel_size=(3, 1),
                        padding='same',
                        use_bias=False)(x)
    x = get_post(x)

    x = Conv2D(ch_out,
               kernel_size=(2, 1),
               strides=(1, 2),
               padding='same',
               use_bias=False)(x)
    x = get_post(x)

    return x


def Effnet(input_shape, nb_classes, include_top=True, weights=None):
    x_in = Input(shape=input_shape)

    x = get_block(x_in, 32, 64)
    x = get_block(x, 64, 128)
    x = get_block(x, 128, 256)

    if include_top:
        x = Flatten()(x)
        x = Dense(nb_classes, activation='softmax')(x)

    model = Model(inputs=x_in, outputs=x)

    if weights is not None:
        model.load_weights(weights, by_name=True)

    return model

In [None]:
X = train.iloc[:, 1:-1]     # sensor_1 ~ sensor_32 / [2335, 32]
target = test.iloc[:, 1:]   # sensor_1 ~ sensor_32 / [9343, 32]

In [None]:
ohe = OneHotEncoder(sparse = False)
y = ohe.fit_transform(train[['target']])    # [2335, 4]
skf = StratifiedShuffleSplit(n_splits=10, train_size=0.9, test_size=0.1, random_state=2022)
es = EarlyStopping(monitor = 'val_acc', patience = 10, mode = 'max', verbose = 1)

In [None]:
eff_acc = []
eff_pred = np.zeros((target.shape[0], 4))   # [9343, 4]

for i, (tr_idx, val_idx) in enumerate(skf.split(X, train.target)) :
    print(f'{i + 1} Fold Training.....')
    tr_x, tr_y = X[tr_idx], y[tr_idx]
    val_x, val_y = X[val_idx], y[val_idx]
    
    ### Effnet 모델
    input_shape = (8,8,1)   #(8,4,1)
    eff = Effnet(input_shape=input_shape, nb_classes=4, include_top=True, weights=None)
   

    ### ModelCheckPoint Fold마다 갱신
    mc = ModelCheckpoint(f'model_{i + 1}.h5', save_best_only = True, monitor = 'val_acc', mode = 'auto', verbose = 0)    # monitor 변경 / val_acc
    
    ### 모델 compile
    eff.compile(optimizer = RMSprop(learning_rate = 0.0005), loss = 'categorical_crossentropy', metrics = ['acc'])            # optimizer 변경

    eff.fit(tr_x, tr_y, validation_data = (val_x, val_y), epochs = 100, batch_size = 32, callbacks = [es, mc], verbose = 1)

    ### 최고 성능 기록 모델 Load
    best = load_model(f'model_{i + 1}.h5')
    ### validation predict
    val_pred = best.predict(val_x)
    ### 확률값 중 최대값을 클래스로 매칭
    val_cls = np.argmax(val_pred, axis = 1)
    ### Fold별 정확도 산출
    fold_eff_acc = accuracy_score(np.argmax(val_y, axis = 1), val_cls)
    eff_acc.append(fold_eff_acc)
    print(f'{i + 1} Fold ACC of CNN = {fold_eff_acc}\n')

    ### Fold별 test 데이터에 대한 예측값 생성 및 앙상블
    fold_pred = best.predict(target) / skf.n_splits
    # print(fold_pred.shape)
    eff_pred += fold_pred

In [None]:
# 결과 확인
submission['target'] = np.argmax(eff_pred, axis = 1)
print(np.argmax(eff_pred, axis = 1))
print(np.mean(eff_acc))
print(submission.target.value_counts())
eff_pred