In [None]:
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
import random
%matplotlib inline

import xgboost
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier,StackingClassifier
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, GridSearchCV, StratifiedKFold
import sklearn.metrics as metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFECV

In [None]:
# Seed 고정
import torch
import random

def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed_everything(2022)

In [None]:
train = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/train.csv')
train_X = train.drop(['id','target'],axis=1)
train_y = train.target

test = pd.read_csv('/content/drive/MyDrive/AI_individual/Dacon_hand_gesture/data/test.csv')
test_X = test.drop(['id'], axis=1)

In [None]:
skf = StratifiedKFold(n_splits = 10, random_state = 2022, shuffle = True) 

In [None]:
rnd_clf = RandomForestClassifier(random_state=2022) # RandomForest #모델을 객체에 할당
lgbm_clf = LGBMClassifier(random_state=2022)
xgb_clf = XGBClassifier(random_state=2022)
stacking_clf = StackingClassifier(estimators = [('rnd', rnd_clf),
                                                ('lgbm', lgbm_clf), 
                                                ('xgb', xgb_clf)
                                                ],
                                  cv = StratifiedKFold(n_splits=5, random_state=2022, shuffle=True),
                                  final_estimator = XGBClassifier(random_state=2022)
                                  )

In [None]:
# pandas를 np 로 바꿔주는 작업
train_X = np.array(train_X)
train_y = np.array(train_y)

test_X = np.array(test_X)

In [None]:
acc_mean = 0.0
pred_ensemble = np.zeros([9343, 4])

for i, (train_idx, valid_idx) in enumerate(skf.split(train_X, train_y)):
    print(f'{i+1} Fold Trainign...')
    tr_x, tr_y = train_X[train_idx], train_y[train_idx]
    val_x, val_y = train_X[valid_idx], train_y[valid_idx]

    stacking_clf.fit(tr_x, tr_y)
    val_pred = stacking_clf.predict(val_x)
    acc_tmp = accuracy_score(val_y, val_pred)
    print(f'{i+1} Fold Acc : {acc_tmp}')

    pred = stacking_clf.predict_proba(test_X)
    acc_mean += (acc_tmp / skf.n_splits)
    pred_ensemble += (pred / skf.n_splits)

In [None]:
# 결과 확인
print(np.argmax(pred_ensemble, axis = 1))
print(acc_mean)
pred_ensemble

In [None]:
# 결과확인
submission['target'] = np.argmax(pred_ensemble, axis = 1)
submission.target.value_counts()