In [1]:
cd ..

/Users/satouwataru/Desktop/DiveIntoCode/git/diveintocode-ml/GraduationWork/v4/codes/model


In [2]:
cd ..

/Users/satouwataru/Desktop/DiveIntoCode/git/diveintocode-ml/GraduationWork/v4/codes


In [3]:
cd ..

/Users/satouwataru/Desktop/DiveIntoCode/git/diveintocode-ml/GraduationWork/v4


In [5]:
import numpy as np
import pandas as pd
import codes.common as c
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import pickle
import optuna

'''
model : XGboost
description:試合結果の3値分類（勝分敗）
''' 
class model():
    def __init__(self):
        self.common = c.common()
        self.common.PY_NAME = 'model_1'
        self.y_col = 'y_H_result'
        
        self.x_train, self.x_val, self.x_test = None, None, None
        self.y_train, self.y_val = None, None
        self.model = None
        self.f_model_name = None
        
    def get_y_pred(self):
        
        self.preprocessing()
        
        self.set_model()
        
        y_pred = self.predict()
        
        df_y = pd.DataFrame(y_pred, columns = ['pred_m7'])
        
        return df_y
        
    def predict(self):
        dtest = xgb.DMatrix(self.x_test)
        
        y_pred = self.model.predict(dtest)
        return y_pred
    
    def set_model(self):
        year = str(self.x_test[:1]['年月日'].values[0])[:4]
        self.f_model_name = 'data/model/base_models/model_7/model_for_' + year +'.sav'
        try:
            self.model = pickle.load(open(self.f_model_name, 'rb'))
        except:
            self.fit()
            
    def fit(self):
        self.dtrain = xgb.DMatrix(self.x_train, label=self.y_train)
        dval = xgb.DMatrix(self.x_val, label=self.y_val)
        study = optuna.create_study()
        study.optimize(self.objective, n_trials=2)#50)
        
        trial = study.best_trial
        
        self.params["max_depth"] = trial.params["max_depth"]
        self.params["eta"] = trial.params["eta"]

        model = xgb.train(params=self.params,
                  dtrain=self.dtrain,
                  num_boost_round=1000,
                  early_stopping_rounds=5,
                  evals=[(dval, "test")])

        self.model = model
        # 保存
        pickle.dump(self.model, open(self.f_model_name, 'wb'))
        # Accuracy の計算
        y_pred = self.model.predict(dval)
        
        accuracy = sum(self.y_val == y_pred) / len(self.y_val)
        print('accuracy:', accuracy)
        
    def objective(self, trial):
        self.params = {
            "silent": 1,
            "max_depth": trial.suggest_int("max_depth", 1, 9),
            "min_child_weight": 1,
            "eta": trial.suggest_loguniform("eta", 0.01, 1.0),
            "tree_method": "exact",
            "objective": "multi:softmax",
            "num_class": 3,
            "predictor": "cpu_predictor"  
        }
        cv_results = xgb.cv(
            self.params,
            self.dtrain,
            num_boost_round=1000,
            seed=0,
            nfold=5, # CVの分割数
#             metrics={"rmse"},
            early_stopping_rounds=200
        )
        return cv_results["test-merror-mean"].min()

    def preprocessing(self):
        # 読み込み
        df = pd.read_csv("data/model/base_models/preprocessing/preprocessed_1.csv", index_col=0)
        # 目的変数のデータ数を揃える
        train = df[df['train_test']=='train'].drop(columns = ['train_test'])
        train = train.sort_values('年月日', ascending=False)
        train_0 = train[train['y_H_result'] == 0]
        train_1 = train[train['y_H_result'] == 1]
        train_2 = train[train['y_H_result'] == 2]
        n_row_0 = train_0.shape[0]
        n_row_1 = train_1.shape[0]
        n_row_2 = train_2.shape[0]
        n_row = min(n_row_0, n_row_1, n_row_2)
        train = pd.concat([train_0.iloc[:n_row], train_1.iloc[:n_row], train_2.iloc[:n_row]])
        # 不要な列を削除
        train = self.common.drop_y_col(train, self.y_col)
        # train, val, testに分割
        x_train = train.drop(columns = self.y_col)
        y_train = train[self.y_col]
        self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(x_train, y_train, stratify = y_train)
        
        test = df[df['train_test']=='test'].drop(columns = ['train_test'])
        test = self.common.drop_y_col(test, self.y_col)
        self.x_test = test.drop(columns = self.y_col)

In [6]:
m = model()
df = m.get_y_pred()

[32m[I 2021-06-09 02:20:10,564][0m A new study created in memory with name: no-name-063bcdf3-6d1d-4a50-bbe8-3ba160a93b36[0m


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo

[32m[I 2021-06-09 02:20:21,727][0m Trial 0 finished with value: 0.5080222 and parameters: {'max_depth': 9, 'eta': 0.21101822490675054}. Best is trial 0 with value: 0.5080222.[0m


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo

[32m[I 2021-06-09 02:20:35,727][0m Trial 1 finished with value: 0.5060148 and parameters: {'max_depth': 9, 'eta': 0.11781449114852187}. Best is trial 1 with value: 0.5060148.[0m


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo

[32m[I 2021-06-09 02:20:46,439][0m Trial 2 finished with value: 0.509627 and parameters: {'max_depth': 8, 'eta': 0.32009345563147196}. Best is trial 1 with value: 0.5060148.[0m


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo

[32m[I 2021-06-09 02:21:01,369][0m Trial 3 finished with value: 0.49699 and parameters: {'max_depth': 7, 'eta': 0.01983124053081527}. Best is trial 3 with value: 0.49699.[0m


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo

[32m[I 2021-06-09 02:21:15,495][0m Trial 4 finished with value: 0.5048142 and parameters: {'max_depth': 9, 'eta': 0.08651860544376543}. Best is trial 3 with value: 0.49699.[0m


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	test-merror:0.53008
Will train until test-merror hasn't improved in 5 rounds.
[1]	test-merror:0.53008
[2]	test-merror:0.53069
[3]	test-merror:0.52768
[4]	test-merror:0.53069
[5]	test-merror:0.53069
[6]	test-merror:0.52768
[7]	test-merror:0.52768
[8]	test-merror:0.52407
[9]	test-merror:0.52527
[10]	test-merror:0.52527
[11]	test-merror:0.52347
[12]	test-merror:0.52166
[13]	test-merror:0.51986
[14]	test-merror:0.52046
[15]	test-merror:0.51624
[16]	test-merror:0.51263
[17]	test-merror:0.51685
[18]	test-merror:0.51865
[19]	test-merror:0.51986
[20]	test-merror:0.52106
[21]	test-merror:0.51925
Stopping. Best iteration:
[16]	test-merror:0.51263

1.0
1.0
1.0
2.0
2.0
0.0
1.0
0.0
2.0
1.0
0.0
1.0
1.0
2.0
1.0
1.0
1.0


In [7]:
df

Unnamed: 0,pred_m7
0,2.0
1,1.0
2,2.0
