In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV 
# XGBoost
import xgboost as xgb
from xgboost import XGBRegressor
from xgboost import XGBClassifier

## 訓練データを読み込み、目的変数と説明変数に分ける

In [3]:
train = pd.read_table("data/train.tsv",)
Y_train = train['class'].copy()
X_train = train.loc[:, ["buying", "maint", 'doors', 'persons',"lug_boot", "safety"]].copy()

### doorsとpersonsの非数値を数値に変換する
### replaceはobject型なので、intに変換する

In [4]:
X_train['doors'] = X_train['doors'].replace({"5more":"5"}).astype(int)
X_train['persons'] = X_train['persons'].replace({"more":'5'}).astype(int)

### class, buying, maint, lug_boot, safetyを、カテゴリ変数として数値に変換する

In [6]:
Y_train = Y_train.map({'unacc': 1, 'acc': 2, 'good': 3, 'vgood': 4})
X_train['buying']=X_train['buying'].map({'low': 1, 'med': 2, 'high': 3, 'vhigh': 4});
X_train['maint']=X_train['maint'].map({'low': 1, 'med': 2, 'high': 3, 'vhigh': 4});
X_train['lug_boot']=X_train['lug_boot'].map({'small': 1, 'med': 2, 'big': 3});
X_train['safety']=X_train['safety'].map({'low': 1, 'med': 2, 'high': 3});

### 以前決めたハイパーパラメタでXGBoostを使う

In [7]:
params = {
    'n_estimators':600,
    'max_depth':4,
    'min_child_weight':9,
    'gamma':0,
    'subsample':1.0,
    'colsample_bytree':0.6,
    'learning_rate':0.1
}
xgboost_opt = XGBClassifier(**params, seed=42)
xgboost_opt.fit(X_train,Y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.6, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=4, min_child_weight=9, missing=None, n_estimators=600,
       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=42, silent=True,
       subsample=1.0)

## テストデータを読み、訓練データと同じ加工をする

In [8]:
test = pd.read_table("data/test.tsv",)
X_test = test.loc[:,["buying", "maint", 'doors', 'persons',"lug_boot", "safety"]]
X_test['doors'] = X_test['doors'].replace({"5more":"5"}).astype(int)
X_test['persons'] = X_test['persons'].replace({"more":'5'}).astype(int)
X_test['buying']=X_test['buying'].map({'low': 1, 'med': 2, 'high': 3, 'vhigh': 4});
X_test['maint']=X_test['maint'].map({'low': 1, 'med': 2, 'high': 3, 'vhigh': 4});
X_test['lug_boot']=X_test['lug_boot'].map({'small': 1, 'med': 2, 'big': 3});
X_test['safety']=X_test['safety'].map({'low': 1, 'med': 2, 'high': 3});

In [9]:
Y_test = xgboost_opt.predict(X_test)

In [11]:
submit = pd.read_csv('data/sample_submit.csv',names=['id','result'])
submit['result']=Y_test
submit['result']=submit['result'].map({1:'unacc', 2:'acc', 3:'good', 4:'vgood'})
submit.to_csv('output/submit3_python_impl.csv',  header=False, index=False)