## XGBデフォルト

In [1]:
import numpy as np
import pandas as pd

import xgboost
from xgboost import XGBClassifier
from sklearn.metrics import mean_squared_error, make_scorer, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split


In [3]:
# pickle ファイルから読み込み
train_pkl = pd.read_pickle('./train.pk3')

In [4]:
train_pkl.shape

(850, 10)

In [7]:
train_pkl.columns

Index(['Age', 'T_Bil', 'D_Bil', 'ALP', 'ALT_GPT', 'AST_GOT', 'TP', 'Alb',
       'AG_ratio', 'disease'],
      dtype='object')

## 訓練データとテストデータに分割

In [5]:
# 訓練とテストデータに分割
train, test = train_test_split(train_pkl, test_size=0.2, random_state=42)

In [6]:
# ターゲットと特徴量の分割
train_X = train.iloc[:, :-1]
train_y = train.disease.values

In [7]:
train_X.head(5)

Unnamed: 0,Age,T_Bil,D_Bil,ALP,ALT_GPT,AST_GOT,TP,Alb,AG_ratio
332,44,2.066406,0.631348,213.935623,14.563273,47.154297,7.503906,2.302734,0.759766
383,36,0.817871,0.197021,214.644638,15.622564,21.059477,7.511719,3.630859,1.291016
281,20,0.791992,0.082642,358.339508,12.924613,25.77248,8.648438,4.324219,1.319336
2,65,0.657227,0.081299,320.770538,12.625011,30.61318,5.949219,2.488281,0.774902
231,29,1.714844,0.51123,215.885971,27.66971,60.709866,5.953125,3.117188,1.21875


## 訓練データで訓練

In [8]:
XGB = XGBClassifier()

In [9]:
XGB = XGB.fit(train_X, train_y)

In [10]:
XGB.feature_importances_

array([0.03163151, 0.2974341 , 0.09086896, 0.08416744, 0.11689862,
       0.17965345, 0.06029121, 0.04966712, 0.08938762], dtype=float32)

In [11]:
sorted(
    zip(map(lambda x: round(x, 3), XGB.feature_importances_), train.iloc[:, :-1].columns),
    reverse=True)

[(0.297, 'T_Bil'),
 (0.18, 'AST_GOT'),
 (0.117, 'ALT_GPT'),
 (0.091, 'D_Bil'),
 (0.089, 'AG_ratio'),
 (0.084, 'ALP'),
 (0.06, 'TP'),
 (0.05, 'Alb'),
 (0.032, 'Age')]

## テストデータで実行

In [12]:
# ターゲットと特徴量の分割
test_x = test.iloc[:, :-1]
test_y = test.disease.values

In [13]:
test_x.shape, test_y.shape

((170, 9), (170,))

In [14]:
pred_y = XGB.predict(test_x)

In [15]:
confusion_matrix(test_y, pred_y)

array([[88,  5],
       [18, 59]], dtype=int64)

In [16]:
accuracy_score(test_y, pred_y)

0.8647058823529412

## 検証データで実行

In [17]:
# 検証データ読み込み
valid = pd.read_pickle('./test.pk3')

In [18]:
valid.shape

(350, 9)

In [19]:
type(valid)

pandas.core.frame.DataFrame

In [20]:
# ID の保存
# valid_pass = valid.id.values

In [21]:
# valid_X = valid.iloc[:, 1:]
valid_X = valid.copy()

In [22]:
valid_X.shape, train_X.shape

((350, 9), (680, 9))

In [23]:
pred_valid_y = XGB.predict(valid_X)

In [24]:
pred_valid_y.shape

(350,)

In [25]:
# type(valid_pass), type(pred_valid_y)

In [26]:
result_df = pd.DataFrame(pred_valid_y)

In [27]:
result_df.to_csv("./XGB_4.csv", header=False)