In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score

from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

import warnings
warnings.filterwarnings('ignore')

# Google Colab 연결
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Seed 고정
import torch
import random

def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed_everything(2022)

In [None]:
train = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/train.csv') # 2335 rows 34 columns
test = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/test.csv')   # 9343 rows 33 columns
submission = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/sample_submission.csv')

In [None]:
# 기존 32개 칼럼 + 짝수홀수 순서의 동일 32개 칼럼
def feat_transform_8x8(train, test):
    col_list = list(train.columns[1:-1])
    X = train.iloc[:, 1:-1]     # sensor_1 ~ sensor_32 / [2335, 32]
    target = test.iloc[:, 1:]   # sensor_1 ~ sensor_32 / [9343, 32]

    for i in range(0, len(col_list), 2):
        new = f'copy_{col_list[i]}'
        X[new] = X[col_list[i]]

    for i in range(1, len(col_list), 2):
        new = f'copy_{col_list[i]}'
        X[new] = X[col_list[i]]

    for i in range(0, len(col_list), 2):
        new = f'copy_{col_list[i]}'
        target[new] = target[col_list[i]]

    for i in range(1, len(col_list), 2):
        new = f'copy_{col_list[i]}'
        target[new] = target[col_list[i]]
    
    return X, target

In [None]:
# Fake sensor 4개 추가하고  6 * 6 형태로 진행

def feat_transform_6x6(train, test):
    col_list = list(train.columns[1:-1])
    first_col = col_list[0:4]
    second_col = col_list[4:10]
    third_col = col_list[10:16]
    fourth_col = col_list[16:22]
    fifth_col = col_list[22:28]
    sixth_col = col_list[28:32]

    # fake sensor
    fake_sensor_2 = [float(-150) for i in range(2335)]
    fake_sensor_3 = [float(-150) for i in range(2335)]
    fake_sensor_4 = [float(-150) for i in range(2335)]

    my_train = pd.DataFrame({'fake_sensor_1':[float(-150) for i in range(2335)], })
    for i in first_col:
        my_train[i] = train[i]
    my_train['fake_sensor_2'] = fake_sensor_2
    for i in col_list[4:28]:
        my_train[i] = train[i]
    my_train['fake_sensor_3'] = fake_sensor_3
    for i in sixth_col:
        my_train[i] = train[i]
    my_train['fake_sensor_4'] = fake_sensor_4
    my_train['target'] = train['target']

    my_test = pd.DataFrame({'fake_sensor_1':[float(-150) for i in range(2335)], })
    for i in first_col:
        my_test[i] = test[i]
    my_test['fake_sensor_2'] = fake_sensor_2
    for i in col_list[4:28]:
        my_test[i] = test[i]
    my_test['fake_sensor_3'] = fake_sensor_3
    for i in sixth_col:
        my_test[i] = test[i]
    my_test['fake_sensor_4'] = fake_sensor_4

    X = my_train.iloc[:, :-1]
    target = my_test.iloc[:, :]
    
    return X, target

In [None]:
X = np.array(X).reshape(-1, 8, 8, 1)                # [2335, 8, 8, 1]
target = np.array(target).reshape(-1, 8, 8, 1)      # [9343, 8, 8, 1]

In [None]:
ohe = OneHotEncoder(sparse = False)
y = ohe.fit_transform(train[['target']])    # [2335, 4]
skf = StratifiedShuffleSplit(n_splits=10, train_size=0.9, test_size=0.1, random_state=2022)
es = EarlyStopping(monitor = 'val_acc', patience = 10, mode = 'max', verbose = 1)

In [None]:
record_list = list()

for i in range(10):
    cnn_acc = []
    cnn_pred = np.zeros((target.shape[0], 4))   # [9343, 4]

    dim_3_3 = 64
    dim_1_1 = 16

    for i, (tr_idx, val_idx) in enumerate(skf.split(X, train.target)) :
        print(f'{i + 1} Fold Training.....')
        tr_x, tr_y = X[tr_idx], y[tr_idx]
        val_x, val_y = X[val_idx], y[val_idx]
        
        ### CNN 모델
        cnn = Sequential([
                        Conv2D(dim_3_3, (2, 2), padding = "same", activation = 'relu', input_shape = (8, 8, 1)),
                        BatchNormalization(),
                        Conv2D(dim_1_1, (1, 1), padding = "same", activation = 'relu'),
                        BatchNormalization(),
                        Conv2D(dim_3_3, (3, 3), padding = "same", activation = 'relu'),
                        
                        BatchNormalization(),
                        Conv2D(dim_1_1, (1, 1), padding = "same", activation = 'relu'),
                        BatchNormalization(),
                        Conv2D(dim_3_3, (4, 4), padding = "same", activation = 'relu'),
                        BatchNormalization(),
                        GlobalAveragePooling2D(),
                        Dense(32, activation = 'relu'),
                        Dense(4, activation = 'softmax')
                        ])


        ### ModelCheckPoint Fold마다 갱신
        mc = ModelCheckpoint(f'model_{i + 1}.h5', save_best_only = True, monitor = 'val_acc', mode = 'auto', verbose = 0)    # monitor 변경 / val_acc
        
        ### 모델 compile
        cnn.compile(optimizer = RMSprop(learning_rate = 0.0004), loss = 'categorical_crossentropy', metrics = ['acc'])            # optimizer 변경

        cnn.fit(tr_x, tr_y, validation_data = (val_x, val_y), epochs = 100, batch_size = 32, callbacks = [es, mc], verbose = 0)

        ### 최고 성능 기록 모델 Load
        best = load_model(f'model_{i + 1}.h5')
        ### validation predict
        val_pred = best.predict(val_x)
        ### 확률값 중 최대값을 클래스로 매칭
        val_cls = np.argmax(val_pred, axis = 1)
        ### Fold별 정확도 산출
        fold_cnn_acc = accuracy_score(np.argmax(val_y, axis = 1), val_cls)
        cnn_acc.append(fold_cnn_acc)
        print(f'{i + 1} Fold ACC of CNN = {fold_cnn_acc}\n')

        ### Fold별 test 데이터에 대한 예측값 생성 및 앙상블
        fold_pred = best.predict(target) / skf.n_splits
        # print(fold_pred.shape)
        cnn_pred += fold_pred
    record_list.append(cnn_pred)

In [None]:
# 결과 확인
print(np.mean(cnn_acc)) 
print(np.argmax(cnn_pred, axis = 1))
cnn_pred