# Imports & Load Data
作業に必要なライブラリをインポートして、 以下のデータを読み込みます。

* stock_price : 株価情報
* stock_list : 銘柄情報
* stock_fin : 財務諸表
* stock_labels : 目的変数

In [2]:
!pip install catboost 0.24.4 

Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/96/3b/bb419654adcf7efff42ed8a3f84e50c8f236424b7ed1cc8ccd290852e003/catboost-0.24.4-cp37-none-manylinux1_x86_64.whl (65.7MB)
[K    100% |████████████████████████████████| 65.7MB 298kB/s eta 0:00:01    15% |████▉                           | 10.0MB 10.4MB/s eta 0:00:06    20% |██████▋                         | 13.5MB 13.3MB/s eta 0:00:04    21% |██████▉                         | 14.0MB 12.3MB/s eta 0:00:05    24% |███████▊                        | 15.8MB 14.2MB/s eta 0:00:04    24% |████████                        | 16.4MB 11.3MB/s eta 0:00:05    25% |████████▎                       | 17.0MB 10.4MB/s eta 0:00:05    26% |████████▋                       | 17.6MB 13.4MB/s eta 0:00:04
Collecting plotly (from catboost)
[?25l  Downloading https://files.pythonhosted.org/packages/1f/f6/bd3c17c8003b6641df1228e80e1acac97ed8402635e46c2571f8e1ef63af/plotly-4.14.3-py2.py3-none-any.whl (13.2MB)
[K    100% |██████████████

In [3]:
import os
import pickle
import sys
import warnings
from glob import glob

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import shap
import  xgboost as xgb
from scipy.stats import spearmanr
from sklearn.ensemble import (
    ExtraTreesRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.metrics import accuracy_score, mean_squared_error
from tqdm.auto import tqdm
from catboost import CatBoostRegressor


# 表示用の設定を変更します
%matplotlib inline
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100
pd.options.display.width = 120

In [4]:
# python 3.7.3であることを確認します
import sys
print(sys.version)

3.7.3 (default, Mar 27 2019, 22:11:17) 
[GCC 7.3.0]


In [5]:
# データセット保存先ディレクトリ（""の中身はご自身の環境に合わせて定義してください。）
dataset_dir="/path/to"

In [6]:
# 読み込むファイルを定義します。
inputs = {
    "stock_list": f"{dataset_dir}/stock_list.csv.gz",
    "stock_price": f"{dataset_dir}/stock_price.csv.gz",
    "stock_fin": f"{dataset_dir}/stock_fin.csv.gz",
    # 本チュートリアルでは使用しないため、コメントアウトしています。
    # "stock_fin_price": f"{dataset_dir}/stock_fin_price.csv.gz",
    "stock_labels": f"{dataset_dir}/stock_labels.csv.gz",
}

# ファイルを読み込みます
dfs = {}
for k, v in inputs.items():
    print(k)
    dfs[k] = pd.read_csv(v)

stock_list
stock_price
stock_fin
stock_labels


# 特徴量の生成

In [7]:
def get_feature_columns(dfs, train_X, column_group="fundamental+technical"):
    # 特徴量グループを定義
    # ファンダメンタル
    fundamental_cols = dfs["stock_fin"].select_dtypes("float64").columns
    fundamental_cols = fundamental_cols[
        fundamental_cols != "Result_Dividend DividendPayableDate"
        ]
    fundamental_cols = fundamental_cols[fundamental_cols != "Local Code"]
    # 価格変化率
    returns_cols = [x for x in train_X.columns if "return" in x]
    # テクニカル
    technical_cols = [
        x for x in train_X.columns if
        (x not in fundamental_cols) and (x != "code")
    ]
    columns = {
        "fundamental_only": fundamental_cols,
        "return_only": returns_cols,
        "technical_only": technical_cols,
        "fundamental+technical": list(fundamental_cols) + list(
            technical_cols),
    }
    return columns[column_group]

In [8]:
model_path = os.path.join(os.path.dirname("__file__"), "../model")
test_X_path = os.path.join(os.path.dirname("__file__"), "../model/proceed_datas/test_X")
test_y_path = os.path.join(os.path.dirname("__file__"), "../model/proceed_datas/test_y")
val_X_path = os.path.join(os.path.dirname("__file__"), "../model/proceed_datas/val_X")
val_y_path = os.path.join(os.path.dirname("__file__"), "../model/proceed_datas/val_y")
train_X_path = os.path.join(os.path.dirname("__file__"), "../model/proceed_datas/train_X")
train_y_path = os.path.join(os.path.dirname("__file__"), "../model/proceed_datas/train_y")

In [9]:
# 対象の目的変数を定義
labels = {
#    "label_high_5",
#    "label_high_10",
    "label_high_20",
#    "label_low_5",
#    "label_low_10",
    "label_low_20",
}

In [10]:
# 特徴量追加済みデータ
proceed_datas = {
    "train_X",
    "train_y",
    "val_X",
    "val_y",
    "test_X",
    "test_y"
}

In [13]:
# ライブラリインポート
from sklearn.model_selection import GridSearchCV

'''
reg_cv = GridSearchCV(xgb, {
    "eta": [0.01, 0.05, 0.1], 
    "gamma": [0.1,0.2,0.3,0.4,0.5],
    "n_estimators": [50, 100, 200], 
    "max_depth": [5, 7, 9,10,20,30],
    "subsample":[0.6,0.8,1],
    "colsample_bytree": [0.5,0.7,0.9],
}, verbose=1)
'''
reg_cv = GridSearchCV(CatBoostRegressor(), {'iterations': [222], 'depth': [9], 'learning_rate': [0.18831273426065617],               
        'random_strength': [33], 'bagging_temperature': [0.06584346890760226], 
        'od_type': ['Iter'], 'od_wait': [21]
    }, verbose=1)


In [14]:
for label in tqdm(labels):
    data_X = os.path.join(train_X_path, f"train_X_{label}.pkl")
    with open(data_X , "rb") as f:
        train_X = pickle.load(f)
    data_y = os.path.join(train_y_path, f"train_y_{label}.pkl")
    with open(data_y , "rb") as f:
        train_y = pickle.load(f)
        
    feature_columns = get_feature_columns(dfs, train_X, column_group='fundamental+technical')
    # 訓練実施
    reg_cv.fit(train_X[feature_columns].values, train_y.values)

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0:	learn: 0.1599492	total: 113ms	remaining: 24.9s
1:	learn: 0.1592691	total: 140ms	remaining: 15.4s
2:	learn: 0.1584998	total: 166ms	remaining: 12.1s
3:	learn: 0.1575706	total: 192ms	remaining: 10.5s
4:	learn: 0.1571170	total: 218ms	remaining: 9.46s
5:	learn: 0.1560890	total: 243ms	remaining: 8.73s
6:	learn: 0.1548959	total: 270ms	remaining: 8.3s
7:	learn: 0.1545646	total: 299ms	remaining: 8.01s
8:	learn: 0.1541441	total: 340ms	remaining: 8.04s
9:	learn: 0.1534789	total: 366ms	remaining: 7.75s
10:	learn: 0.1532661	total: 394ms	remaining: 7.56s
11:	learn: 0.1529457	total: 419ms	remaining: 7.34s
12:	learn: 0.1526601	total: 446ms	remaining: 7.18s
13:	learn: 0.1525200	total: 474ms	remaining: 7.05s
14:	learn: 0.1519625	total: 501ms	remaining: 6.92s
15:	learn: 0.1514368	total: 528ms	remaining: 6.8s
16:	learn: 0.1513033	total: 552ms	remaining: 6.66s
17:	learn: 0.1511497	total: 584ms	remaining: 6.62s
18:	learn: 0.1507912	total: 611ms	remaining: 6.53s
19:	learn: 0.1506301	total: 636ms	remaining

164:	learn: 0.1012757	total: 4.27s	remaining: 1.48s
165:	learn: 0.1010187	total: 4.29s	remaining: 1.45s
166:	learn: 0.1007683	total: 4.32s	remaining: 1.42s
167:	learn: 0.1005756	total: 4.34s	remaining: 1.4s
168:	learn: 0.1002738	total: 4.36s	remaining: 1.37s
169:	learn: 0.1001125	total: 4.38s	remaining: 1.34s
170:	learn: 0.0998389	total: 4.41s	remaining: 1.31s
171:	learn: 0.0995705	total: 4.43s	remaining: 1.29s
172:	learn: 0.0992575	total: 4.45s	remaining: 1.26s
173:	learn: 0.0988312	total: 4.48s	remaining: 1.24s
174:	learn: 0.0985210	total: 4.5s	remaining: 1.21s
175:	learn: 0.0982179	total: 4.53s	remaining: 1.18s
176:	learn: 0.0980136	total: 4.55s	remaining: 1.16s
177:	learn: 0.0978817	total: 4.58s	remaining: 1.13s
178:	learn: 0.0974666	total: 4.6s	remaining: 1.1s
179:	learn: 0.0972661	total: 4.62s	remaining: 1.08s
180:	learn: 0.0969581	total: 4.65s	remaining: 1.05s
181:	learn: 0.0967952	total: 4.67s	remaining: 1.02s
182:	learn: 0.0964346	total: 4.69s	remaining: 1s
183:	learn: 0.09611

103:	learn: 0.1349514	total: 2.45s	remaining: 2.77s
104:	learn: 0.1342016	total: 2.47s	remaining: 2.75s
105:	learn: 0.1339169	total: 2.5s	remaining: 2.73s
106:	learn: 0.1334818	total: 2.52s	remaining: 2.71s
107:	learn: 0.1330659	total: 2.55s	remaining: 2.69s
108:	learn: 0.1325110	total: 2.58s	remaining: 2.67s
109:	learn: 0.1317955	total: 2.61s	remaining: 2.66s
110:	learn: 0.1313788	total: 2.64s	remaining: 2.64s
111:	learn: 0.1309829	total: 2.67s	remaining: 2.62s
112:	learn: 0.1304687	total: 2.69s	remaining: 2.6s
113:	learn: 0.1298108	total: 2.71s	remaining: 2.57s
114:	learn: 0.1294132	total: 2.74s	remaining: 2.55s
115:	learn: 0.1290041	total: 2.76s	remaining: 2.52s
116:	learn: 0.1286349	total: 2.78s	remaining: 2.5s
117:	learn: 0.1284152	total: 2.81s	remaining: 2.47s
118:	learn: 0.1282067	total: 2.83s	remaining: 2.45s
119:	learn: 0.1280042	total: 2.86s	remaining: 2.43s
120:	learn: 0.1278155	total: 2.88s	remaining: 2.4s
121:	learn: 0.1275722	total: 2.9s	remaining: 2.38s
122:	learn: 0.127

48:	learn: 0.1719525	total: 1.31s	remaining: 4.64s
49:	learn: 0.1718423	total: 1.34s	remaining: 4.61s
50:	learn: 0.1715176	total: 1.36s	remaining: 4.58s
51:	learn: 0.1713102	total: 1.39s	remaining: 4.55s
52:	learn: 0.1710311	total: 1.43s	remaining: 4.55s
53:	learn: 0.1707338	total: 1.47s	remaining: 4.57s
54:	learn: 0.1701729	total: 1.5s	remaining: 4.55s
55:	learn: 0.1699700	total: 1.54s	remaining: 4.56s
56:	learn: 0.1695899	total: 1.56s	remaining: 4.52s
57:	learn: 0.1691924	total: 1.59s	remaining: 4.49s
58:	learn: 0.1689132	total: 1.62s	remaining: 4.47s
59:	learn: 0.1686919	total: 1.65s	remaining: 4.44s
60:	learn: 0.1684579	total: 1.67s	remaining: 4.41s
61:	learn: 0.1680196	total: 1.7s	remaining: 4.38s
62:	learn: 0.1669326	total: 1.73s	remaining: 4.37s
63:	learn: 0.1665677	total: 1.77s	remaining: 4.37s
64:	learn: 0.1660109	total: 1.81s	remaining: 4.36s
65:	learn: 0.1650247	total: 1.85s	remaining: 4.37s
66:	learn: 0.1641077	total: 1.89s	remaining: 4.37s
67:	learn: 0.1630693	total: 1.92s

210:	learn: 0.1102902	total: 5.87s	remaining: 306ms
211:	learn: 0.1100649	total: 5.91s	remaining: 279ms
212:	learn: 0.1098300	total: 5.94s	remaining: 251ms
213:	learn: 0.1094233	total: 5.96s	remaining: 223ms
214:	learn: 0.1092571	total: 5.99s	remaining: 195ms
215:	learn: 0.1090529	total: 6.02s	remaining: 167ms
216:	learn: 0.1088700	total: 6.05s	remaining: 139ms
217:	learn: 0.1087253	total: 6.08s	remaining: 112ms
218:	learn: 0.1084454	total: 6.11s	remaining: 83.8ms
219:	learn: 0.1081685	total: 6.14s	remaining: 55.8ms
220:	learn: 0.1079985	total: 6.16s	remaining: 27.9ms
221:	learn: 0.1076874	total: 6.19s	remaining: 0us


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   21.0s finished


0:	learn: 0.1798723	total: 43.8ms	remaining: 9.69s
1:	learn: 0.1789859	total: 71.1ms	remaining: 7.82s
2:	learn: 0.1780471	total: 96.1ms	remaining: 7.01s
3:	learn: 0.1768942	total: 126ms	remaining: 6.87s
4:	learn: 0.1763451	total: 154ms	remaining: 6.67s
5:	learn: 0.1752273	total: 186ms	remaining: 6.7s
6:	learn: 0.1738233	total: 217ms	remaining: 6.66s
7:	learn: 0.1734764	total: 244ms	remaining: 6.51s
8:	learn: 0.1730545	total: 272ms	remaining: 6.45s
9:	learn: 0.1722565	total: 296ms	remaining: 6.28s
10:	learn: 0.1718921	total: 320ms	remaining: 6.13s
11:	learn: 0.1714188	total: 344ms	remaining: 6.02s
12:	learn: 0.1710984	total: 367ms	remaining: 5.9s
13:	learn: 0.1709429	total: 390ms	remaining: 5.79s
14:	learn: 0.1698921	total: 413ms	remaining: 5.7s
15:	learn: 0.1695016	total: 442ms	remaining: 5.7s
16:	learn: 0.1693713	total: 469ms	remaining: 5.66s
17:	learn: 0.1691785	total: 494ms	remaining: 5.59s
18:	learn: 0.1688542	total: 518ms	remaining: 5.53s
19:	learn: 0.1686782	total: 541ms	remainin

165:	learn: 0.1233248	total: 4.32s	remaining: 1.46s
166:	learn: 0.1231069	total: 4.35s	remaining: 1.43s
167:	learn: 0.1228482	total: 4.37s	remaining: 1.41s
168:	learn: 0.1225678	total: 4.4s	remaining: 1.38s
169:	learn: 0.1223274	total: 4.42s	remaining: 1.35s
170:	learn: 0.1220301	total: 4.45s	remaining: 1.33s
171:	learn: 0.1217327	total: 4.47s	remaining: 1.3s
172:	learn: 0.1214646	total: 4.5s	remaining: 1.27s
173:	learn: 0.1212456	total: 4.53s	remaining: 1.25s
174:	learn: 0.1210554	total: 4.56s	remaining: 1.22s
175:	learn: 0.1207165	total: 4.59s	remaining: 1.2s
176:	learn: 0.1205105	total: 4.61s	remaining: 1.17s
177:	learn: 0.1196474	total: 4.64s	remaining: 1.15s
178:	learn: 0.1194184	total: 4.67s	remaining: 1.12s
179:	learn: 0.1192060	total: 4.69s	remaining: 1.09s
180:	learn: 0.1189593	total: 4.71s	remaining: 1.07s
181:	learn: 0.1186881	total: 4.74s	remaining: 1.04s
182:	learn: 0.1184924	total: 4.77s	remaining: 1.02s
183:	learn: 0.1183427	total: 4.79s	remaining: 990ms
184:	learn: 0.11

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


0:	learn: 0.0715550	total: 34.4ms	remaining: 7.61s
1:	learn: 0.0707008	total: 65.8ms	remaining: 7.24s
2:	learn: 0.0694192	total: 94.6ms	remaining: 6.9s
3:	learn: 0.0686060	total: 120ms	remaining: 6.54s
4:	learn: 0.0682548	total: 146ms	remaining: 6.36s
5:	learn: 0.0679019	total: 172ms	remaining: 6.2s
6:	learn: 0.0669789	total: 200ms	remaining: 6.15s
7:	learn: 0.0668095	total: 229ms	remaining: 6.14s
8:	learn: 0.0663844	total: 257ms	remaining: 6.09s
9:	learn: 0.0658959	total: 285ms	remaining: 6.04s
10:	learn: 0.0657639	total: 312ms	remaining: 5.99s
11:	learn: 0.0655412	total: 340ms	remaining: 5.95s
12:	learn: 0.0653097	total: 367ms	remaining: 5.9s
13:	learn: 0.0651447	total: 391ms	remaining: 5.81s
14:	learn: 0.0649209	total: 416ms	remaining: 5.74s
15:	learn: 0.0646543	total: 443ms	remaining: 5.7s
16:	learn: 0.0645369	total: 473ms	remaining: 5.7s
17:	learn: 0.0644499	total: 500ms	remaining: 5.67s
18:	learn: 0.0642366	total: 527ms	remaining: 5.63s
19:	learn: 0.0641029	total: 555ms	remaining

164:	learn: 0.0455560	total: 4.44s	remaining: 1.53s
165:	learn: 0.0454876	total: 4.47s	remaining: 1.51s
166:	learn: 0.0453900	total: 4.49s	remaining: 1.48s
167:	learn: 0.0452890	total: 4.52s	remaining: 1.45s
168:	learn: 0.0451416	total: 4.55s	remaining: 1.43s
169:	learn: 0.0450216	total: 4.58s	remaining: 1.4s
170:	learn: 0.0449788	total: 4.61s	remaining: 1.37s
171:	learn: 0.0449022	total: 4.64s	remaining: 1.35s
172:	learn: 0.0448317	total: 4.67s	remaining: 1.32s
173:	learn: 0.0447487	total: 4.7s	remaining: 1.29s
174:	learn: 0.0446520	total: 4.72s	remaining: 1.27s
175:	learn: 0.0445793	total: 4.75s	remaining: 1.24s
176:	learn: 0.0445143	total: 4.78s	remaining: 1.21s
177:	learn: 0.0444016	total: 4.8s	remaining: 1.19s
178:	learn: 0.0443171	total: 4.83s	remaining: 1.16s
179:	learn: 0.0442611	total: 4.86s	remaining: 1.13s
180:	learn: 0.0441759	total: 4.89s	remaining: 1.11s
181:	learn: 0.0441289	total: 4.92s	remaining: 1.08s
182:	learn: 0.0440486	total: 4.95s	remaining: 1.05s
183:	learn: 0.0

104:	learn: 0.0532833	total: 2.59s	remaining: 2.89s
105:	learn: 0.0531896	total: 2.62s	remaining: 2.87s
106:	learn: 0.0530384	total: 2.64s	remaining: 2.84s
107:	learn: 0.0529034	total: 2.67s	remaining: 2.81s
108:	learn: 0.0527508	total: 2.69s	remaining: 2.79s
109:	learn: 0.0526326	total: 2.71s	remaining: 2.76s
110:	learn: 0.0525419	total: 2.73s	remaining: 2.73s
111:	learn: 0.0524500	total: 2.75s	remaining: 2.7s
112:	learn: 0.0522687	total: 2.77s	remaining: 2.67s
113:	learn: 0.0520999	total: 2.8s	remaining: 2.65s
114:	learn: 0.0519530	total: 2.82s	remaining: 2.63s
115:	learn: 0.0518446	total: 2.84s	remaining: 2.6s
116:	learn: 0.0516847	total: 2.87s	remaining: 2.57s
117:	learn: 0.0515906	total: 2.89s	remaining: 2.55s
118:	learn: 0.0514938	total: 2.91s	remaining: 2.52s
119:	learn: 0.0513878	total: 2.93s	remaining: 2.49s
120:	learn: 0.0512505	total: 2.95s	remaining: 2.46s
121:	learn: 0.0511290	total: 2.98s	remaining: 2.44s
122:	learn: 0.0510434	total: 3s	remaining: 2.42s
123:	learn: 0.0509

42:	learn: 0.0685209	total: 1.03s	remaining: 4.3s
43:	learn: 0.0684091	total: 1.06s	remaining: 4.27s
44:	learn: 0.0682855	total: 1.08s	remaining: 4.24s
45:	learn: 0.0681105	total: 1.1s	remaining: 4.21s
46:	learn: 0.0680561	total: 1.12s	remaining: 4.18s
47:	learn: 0.0679079	total: 1.15s	remaining: 4.15s
48:	learn: 0.0678399	total: 1.17s	remaining: 4.12s
49:	learn: 0.0677428	total: 1.19s	remaining: 4.11s
50:	learn: 0.0675860	total: 1.22s	remaining: 4.09s
51:	learn: 0.0674523	total: 1.24s	remaining: 4.07s
52:	learn: 0.0672812	total: 1.27s	remaining: 4.04s
53:	learn: 0.0671737	total: 1.29s	remaining: 4.02s
54:	learn: 0.0669566	total: 1.31s	remaining: 3.99s
55:	learn: 0.0668699	total: 1.33s	remaining: 3.96s
56:	learn: 0.0667596	total: 1.36s	remaining: 3.93s
57:	learn: 0.0666633	total: 1.38s	remaining: 3.9s
58:	learn: 0.0665675	total: 1.4s	remaining: 3.88s
59:	learn: 0.0664596	total: 1.43s	remaining: 3.86s
60:	learn: 0.0663841	total: 1.45s	remaining: 3.83s
61:	learn: 0.0661840	total: 1.47s	r

202:	learn: 0.0474820	total: 4.87s	remaining: 456ms
203:	learn: 0.0473814	total: 4.9s	remaining: 432ms
204:	learn: 0.0472945	total: 4.93s	remaining: 409ms
205:	learn: 0.0472248	total: 4.95s	remaining: 385ms
206:	learn: 0.0471514	total: 4.98s	remaining: 361ms
207:	learn: 0.0470528	total: 5s	remaining: 337ms
208:	learn: 0.0469798	total: 5.03s	remaining: 313ms
209:	learn: 0.0468892	total: 5.05s	remaining: 288ms
210:	learn: 0.0468083	total: 5.07s	remaining: 264ms
211:	learn: 0.0467162	total: 5.1s	remaining: 240ms
212:	learn: 0.0466171	total: 5.12s	remaining: 216ms
213:	learn: 0.0465442	total: 5.14s	remaining: 192ms
214:	learn: 0.0464597	total: 5.17s	remaining: 168ms
215:	learn: 0.0463763	total: 5.19s	remaining: 144ms
216:	learn: 0.0462922	total: 5.21s	remaining: 120ms
217:	learn: 0.0461695	total: 5.23s	remaining: 96ms
218:	learn: 0.0460700	total: 5.26s	remaining: 72ms
219:	learn: 0.0459130	total: 5.29s	remaining: 48ms
220:	learn: 0.0458248	total: 5.31s	remaining: 24ms
221:	learn: 0.0457403

[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   20.9s finished


0:	learn: 0.0745855	total: 26.3ms	remaining: 5.81s
1:	learn: 0.0737869	total: 53.8ms	remaining: 5.92s
2:	learn: 0.0726931	total: 79.9ms	remaining: 5.83s
3:	learn: 0.0719675	total: 107ms	remaining: 5.84s
4:	learn: 0.0715352	total: 134ms	remaining: 5.83s
5:	learn: 0.0709408	total: 164ms	remaining: 5.9s
6:	learn: 0.0701650	total: 191ms	remaining: 5.86s
7:	learn: 0.0700019	total: 216ms	remaining: 5.78s
8:	learn: 0.0696849	total: 246ms	remaining: 5.82s
9:	learn: 0.0692217	total: 269ms	remaining: 5.71s
10:	learn: 0.0691179	total: 295ms	remaining: 5.65s
11:	learn: 0.0688891	total: 318ms	remaining: 5.56s
12:	learn: 0.0686722	total: 340ms	remaining: 5.47s
13:	learn: 0.0685426	total: 364ms	remaining: 5.4s
14:	learn: 0.0683412	total: 387ms	remaining: 5.34s
15:	learn: 0.0681261	total: 412ms	remaining: 5.3s
16:	learn: 0.0680161	total: 437ms	remaining: 5.27s
17:	learn: 0.0679222	total: 461ms	remaining: 5.22s
18:	learn: 0.0676946	total: 485ms	remaining: 5.18s
19:	learn: 0.0675730	total: 508ms	remaini

167:	learn: 0.0513863	total: 4.13s	remaining: 1.33s
168:	learn: 0.0513429	total: 4.16s	remaining: 1.3s
169:	learn: 0.0512405	total: 4.19s	remaining: 1.28s
170:	learn: 0.0511768	total: 4.21s	remaining: 1.26s
171:	learn: 0.0511116	total: 4.24s	remaining: 1.23s
172:	learn: 0.0510634	total: 4.26s	remaining: 1.21s
173:	learn: 0.0509744	total: 4.29s	remaining: 1.18s
174:	learn: 0.0508799	total: 4.31s	remaining: 1.16s
175:	learn: 0.0507829	total: 4.34s	remaining: 1.13s
176:	learn: 0.0506718	total: 4.37s	remaining: 1.11s
177:	learn: 0.0506306	total: 4.39s	remaining: 1.08s
178:	learn: 0.0505023	total: 4.42s	remaining: 1.06s
179:	learn: 0.0504325	total: 4.44s	remaining: 1.04s
180:	learn: 0.0503590	total: 4.47s	remaining: 1.01s
181:	learn: 0.0503080	total: 4.49s	remaining: 987ms
182:	learn: 0.0502623	total: 4.51s	remaining: 962ms
183:	learn: 0.0501903	total: 4.54s	remaining: 938ms
184:	learn: 0.0501263	total: 4.57s	remaining: 914ms
185:	learn: 0.0500301	total: 4.6s	remaining: 890ms
186:	learn: 0.

In [15]:
# 結果を表示
print(reg_cv.best_params_)
print(reg_cv.best_score_)

{'bagging_temperature': 0.06584346890760226, 'depth': 9, 'iterations': 222, 'learning_rate': 0.18831273426065617, 'od_type': 'Iter', 'od_wait': 21, 'random_strength': 33}
0.2037709004629042


In [16]:
best_model = reg_cv.best_estimator_

In [17]:
SELECT_FIN_DATA_COLUMNS = ['Result_FinancialStatement FiscalYear', 'Result_FinancialStatement NetSales',
       'Result_FinancialStatement OperatingIncome', 'Result_FinancialStatement OrdinaryIncome',
       'Result_FinancialStatement NetIncome', 'Result_FinancialStatement TotalAssets',
       'Result_FinancialStatement NetAssets', 'Result_FinancialStatement CashFlowsFromOperatingActivities',
       'Result_FinancialStatement CashFlowsFromFinancingActivities',
       'Result_FinancialStatement CashFlowsFromInvestingActivities', 'Forecast_FinancialStatement FiscalYear',
       'Forecast_FinancialStatement NetSales', 'Forecast_FinancialStatement OperatingIncome',
       'Forecast_FinancialStatement OrdinaryIncome', 'Forecast_FinancialStatement NetIncome',
       'Result_Dividend FiscalYear', 'Result_Dividend QuarterlyDividendPerShare',
       'Result_Dividend AnnualDividendPerShare', 'Forecast_Dividend FiscalYear',
       'Forecast_Dividend QuarterlyDividendPerShare', 'Forecast_Dividend AnnualDividendPerShare',
       'IssuedShareEquityQuote IssuedShare','Section/Products', '33 Sector(Code)', '17 Sector(Code)']

In [18]:
# 学習用データセット定義
# ファンダメンタル情報
#fundamental_cols = dfs["stock_fin"].select_dtypes("float64").columns
fundamental_cols = pd.Index(SELECT_FIN_DATA_COLUMNS)
fundamental_cols = fundamental_cols[fundamental_cols != "Result_Dividend DividendPayableDate"]
fundamental_cols = fundamental_cols[fundamental_cols != "Local Code"]
# 価格変化率
returns_cols = [x for x in train_X.columns if "return" in x]
# テクニカル
technical_cols = [x for x in train_X.columns if (x not in fundamental_cols) and (x != "code")]

In [19]:
columns = {
    "fundamental_only": fundamental_cols,
    "return_only": returns_cols,
    "technical_only": technical_cols,
    "fundamental+technical": list(fundamental_cols) + list(technical_cols),
}

In [20]:
# 結果保存用
all_results = dict()
all_results['XGB'] = dict()

In [21]:
for label in labels:
    data_X = os.path.join(test_X_path, f"test_X_{label}.pkl")
    with open(data_X , "rb") as f:
        test_X = pickle.load(f)
    data_y = os.path.join(test_y_path, f"test_y_{label}.pkl")
    with open(data_y , "rb") as f:
        test_y = pickle.load(f)
    # データセット毎に処理
    for col in columns.keys():
        result = dict()
        # 目的変数毎に処理
        for label in tqdm(labels):
            if len(test_X[columns[col]]) > 0:
                # モデル取得
                pred_model = best_model
                # 学習
                pred_model.fit(train_X[columns[col]].values, train_y)
                # 結果データ作成
                result[label] = test_X[["code"]].copy()
                result[label]["datetime"] = test_X[columns[col]].index
                # 予測
                result[label]["predict"] = pred_model.predict(test_X[columns[col]].values)
                result[label]["predict_dir"] = np.sign(result[label]["predict"])
                # 実際の結果
                result[label]["actual"] = test_y.values
                result[label]["actual_dir"] = np.sign(result[label]["actual"])
                result[label].dropna(inplace=True)

        all_results['XGB'][col] = result

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0750718	total: 24.5ms	remaining: 5.41s
1:	learn: 0.0743800	total: 42.4ms	remaining: 4.67s
2:	learn: 0.0738418	total: 67.7ms	remaining: 4.94s
3:	learn: 0.0733880	total: 87.1ms	remaining: 4.75s
4:	learn: 0.0731010	total: 107ms	remaining: 4.63s
5:	learn: 0.0728329	total: 131ms	remaining: 4.7s
6:	learn: 0.0726182	total: 189ms	remaining: 5.79s
7:	learn: 0.0724297	total: 206ms	remaining: 5.5s
8:	learn: 0.0723181	total: 315ms	remaining: 7.45s
9:	learn: 0.0722257	total: 347ms	remaining: 7.37s
10:	learn: 0.0720304	total: 374ms	remaining: 7.18s
11:	learn: 0.0719606	total: 399ms	remaining: 6.97s
12:	learn: 0.0718747	total: 421ms	remaining: 6.77s
13:	learn: 0.0718418	total: 432ms	remaining: 6.41s
14:	learn: 0.0717493	total: 454ms	remaining: 6.26s
15:	learn: 0.0716658	total: 470ms	remaining: 6.05s
16:	learn: 0.0716020	total: 489ms	remaining: 5.89s
17:	learn: 0.0715368	total: 512ms	remaining: 5.81s
18:	learn: 0.0714849	total: 533ms	remaining: 5.7s
19:	learn: 0.0713851	total: 555ms	remain

164:	learn: 0.0621084	total: 2.51s	remaining: 868ms
165:	learn: 0.0620844	total: 2.52s	remaining: 852ms
166:	learn: 0.0620188	total: 2.54s	remaining: 836ms
167:	learn: 0.0619693	total: 2.55s	remaining: 820ms
168:	learn: 0.0619360	total: 2.56s	remaining: 804ms
169:	learn: 0.0618706	total: 2.58s	remaining: 788ms
170:	learn: 0.0618113	total: 2.59s	remaining: 773ms
171:	learn: 0.0617570	total: 2.61s	remaining: 758ms
172:	learn: 0.0616716	total: 2.62s	remaining: 741ms
173:	learn: 0.0616585	total: 2.63s	remaining: 725ms
174:	learn: 0.0616014	total: 2.64s	remaining: 709ms
175:	learn: 0.0615500	total: 2.65s	remaining: 693ms
176:	learn: 0.0614924	total: 2.66s	remaining: 677ms
177:	learn: 0.0614206	total: 2.67s	remaining: 661ms
178:	learn: 0.0613904	total: 2.69s	remaining: 646ms
179:	learn: 0.0613518	total: 2.7s	remaining: 630ms
180:	learn: 0.0612746	total: 2.71s	remaining: 615ms
181:	learn: 0.0612475	total: 2.73s	remaining: 599ms
182:	learn: 0.0611766	total: 2.74s	remaining: 583ms
183:	learn: 0

104:	learn: 0.0655896	total: 1.28s	remaining: 1.42s
105:	learn: 0.0655327	total: 1.29s	remaining: 1.41s
106:	learn: 0.0654559	total: 1.3s	remaining: 1.4s
107:	learn: 0.0654206	total: 1.31s	remaining: 1.39s
108:	learn: 0.0653617	total: 1.33s	remaining: 1.38s
109:	learn: 0.0653337	total: 1.34s	remaining: 1.36s
110:	learn: 0.0652591	total: 1.35s	remaining: 1.35s
111:	learn: 0.0651740	total: 1.36s	remaining: 1.34s
112:	learn: 0.0650750	total: 1.37s	remaining: 1.32s
113:	learn: 0.0649724	total: 1.39s	remaining: 1.31s
114:	learn: 0.0649116	total: 1.4s	remaining: 1.3s
115:	learn: 0.0648496	total: 1.41s	remaining: 1.29s
116:	learn: 0.0648366	total: 1.42s	remaining: 1.27s
117:	learn: 0.0647911	total: 1.43s	remaining: 1.26s
118:	learn: 0.0647351	total: 1.44s	remaining: 1.25s
119:	learn: 0.0646799	total: 1.45s	remaining: 1.24s
120:	learn: 0.0646102	total: 1.47s	remaining: 1.22s
121:	learn: 0.0645337	total: 1.48s	remaining: 1.21s
122:	learn: 0.0644833	total: 1.49s	remaining: 1.2s
123:	learn: 0.064

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0752718	total: 22.6ms	remaining: 4.99s
1:	learn: 0.0748620	total: 43.2ms	remaining: 4.75s
2:	learn: 0.0742857	total: 60.4ms	remaining: 4.41s
3:	learn: 0.0738481	total: 77.6ms	remaining: 4.23s
4:	learn: 0.0733291	total: 86.9ms	remaining: 3.77s
5:	learn: 0.0729990	total: 94.8ms	remaining: 3.41s
6:	learn: 0.0727550	total: 105ms	remaining: 3.21s
7:	learn: 0.0726009	total: 114ms	remaining: 3.05s
8:	learn: 0.0723743	total: 123ms	remaining: 2.9s
9:	learn: 0.0722635	total: 132ms	remaining: 2.8s
10:	learn: 0.0721429	total: 141ms	remaining: 2.71s
11:	learn: 0.0717411	total: 151ms	remaining: 2.64s
12:	learn: 0.0715955	total: 157ms	remaining: 2.52s
13:	learn: 0.0715268	total: 166ms	remaining: 2.46s
14:	learn: 0.0713612	total: 175ms	remaining: 2.41s
15:	learn: 0.0711097	total: 185ms	remaining: 2.38s
16:	learn: 0.0709759	total: 194ms	remaining: 2.34s
17:	learn: 0.0709294	total: 205ms	remaining: 2.32s
18:	learn: 0.0708014	total: 214ms	remaining: 2.29s
19:	learn: 0.0706866	total: 225ms	rem

174:	learn: 0.0667118	total: 1.21s	remaining: 325ms
175:	learn: 0.0666967	total: 1.22s	remaining: 318ms
176:	learn: 0.0666820	total: 1.22s	remaining: 311ms
177:	learn: 0.0666666	total: 1.23s	remaining: 303ms
178:	learn: 0.0666444	total: 1.23s	remaining: 296ms
179:	learn: 0.0666278	total: 1.24s	remaining: 289ms
180:	learn: 0.0666138	total: 1.25s	remaining: 282ms
181:	learn: 0.0665983	total: 1.25s	remaining: 275ms
182:	learn: 0.0665797	total: 1.26s	remaining: 268ms
183:	learn: 0.0665577	total: 1.26s	remaining: 261ms
184:	learn: 0.0665353	total: 1.27s	remaining: 254ms
185:	learn: 0.0665171	total: 1.28s	remaining: 247ms
186:	learn: 0.0664968	total: 1.28s	remaining: 240ms
187:	learn: 0.0664825	total: 1.29s	remaining: 233ms
188:	learn: 0.0664606	total: 1.29s	remaining: 226ms
189:	learn: 0.0664379	total: 1.3s	remaining: 219ms
190:	learn: 0.0664210	total: 1.31s	remaining: 212ms
191:	learn: 0.0664008	total: 1.31s	remaining: 205ms
192:	learn: 0.0663878	total: 1.32s	remaining: 198ms
193:	learn: 0

134:	learn: 0.0675077	total: 816ms	remaining: 526ms
135:	learn: 0.0674939	total: 822ms	remaining: 520ms
136:	learn: 0.0674712	total: 829ms	remaining: 514ms
137:	learn: 0.0674399	total: 835ms	remaining: 508ms
138:	learn: 0.0674145	total: 840ms	remaining: 502ms
139:	learn: 0.0673931	total: 845ms	remaining: 495ms
140:	learn: 0.0673661	total: 851ms	remaining: 489ms
141:	learn: 0.0673332	total: 856ms	remaining: 482ms
142:	learn: 0.0673131	total: 862ms	remaining: 476ms
143:	learn: 0.0672889	total: 867ms	remaining: 470ms
144:	learn: 0.0672733	total: 872ms	remaining: 463ms
145:	learn: 0.0672541	total: 878ms	remaining: 457ms
146:	learn: 0.0672447	total: 883ms	remaining: 451ms
147:	learn: 0.0672235	total: 888ms	remaining: 444ms
148:	learn: 0.0671970	total: 894ms	remaining: 438ms
149:	learn: 0.0671784	total: 900ms	remaining: 432ms
150:	learn: 0.0671593	total: 906ms	remaining: 426ms
151:	learn: 0.0671471	total: 911ms	remaining: 420ms
152:	learn: 0.0671383	total: 916ms	remaining: 413ms
153:	learn: 

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0753486	total: 19.5ms	remaining: 4.31s
1:	learn: 0.0739159	total: 42ms	remaining: 4.61s
2:	learn: 0.0728313	total: 70.6ms	remaining: 5.16s
3:	learn: 0.0721351	total: 93.7ms	remaining: 5.11s
4:	learn: 0.0717142	total: 116ms	remaining: 5.02s
5:	learn: 0.0713131	total: 139ms	remaining: 5.01s
6:	learn: 0.0708830	total: 167ms	remaining: 5.11s
7:	learn: 0.0706812	total: 194ms	remaining: 5.2s
8:	learn: 0.0701516	total: 220ms	remaining: 5.21s
9:	learn: 0.0696604	total: 240ms	remaining: 5.08s
10:	learn: 0.0693678	total: 259ms	remaining: 4.97s
11:	learn: 0.0688603	total: 281ms	remaining: 4.91s
12:	learn: 0.0686287	total: 299ms	remaining: 4.81s
13:	learn: 0.0684364	total: 318ms	remaining: 4.72s
14:	learn: 0.0682444	total: 337ms	remaining: 4.66s
15:	learn: 0.0680356	total: 356ms	remaining: 4.59s
16:	learn: 0.0678645	total: 376ms	remaining: 4.53s
17:	learn: 0.0677156	total: 393ms	remaining: 4.46s
18:	learn: 0.0675821	total: 418ms	remaining: 4.47s
19:	learn: 0.0674168	total: 445ms	remain

168:	learn: 0.0519654	total: 3.27s	remaining: 1.03s
169:	learn: 0.0518669	total: 3.29s	remaining: 1.01s
170:	learn: 0.0518332	total: 3.31s	remaining: 988ms
171:	learn: 0.0517380	total: 3.33s	remaining: 969ms
172:	learn: 0.0516748	total: 3.35s	remaining: 949ms
173:	learn: 0.0516212	total: 3.37s	remaining: 929ms
174:	learn: 0.0515419	total: 3.38s	remaining: 909ms
175:	learn: 0.0514511	total: 3.4s	remaining: 890ms
176:	learn: 0.0513808	total: 3.42s	remaining: 870ms
177:	learn: 0.0512975	total: 3.44s	remaining: 850ms
178:	learn: 0.0512455	total: 3.46s	remaining: 831ms
179:	learn: 0.0511785	total: 3.48s	remaining: 812ms
180:	learn: 0.0511028	total: 3.5s	remaining: 793ms
181:	learn: 0.0510083	total: 3.52s	remaining: 773ms
182:	learn: 0.0509265	total: 3.54s	remaining: 754ms
183:	learn: 0.0508544	total: 3.55s	remaining: 734ms
184:	learn: 0.0507749	total: 3.57s	remaining: 715ms
185:	learn: 0.0507281	total: 3.59s	remaining: 695ms
186:	learn: 0.0506497	total: 3.61s	remaining: 676ms
187:	learn: 0.

107:	learn: 0.0572937	total: 1.99s	remaining: 2.1s
108:	learn: 0.0571718	total: 2.01s	remaining: 2.09s
109:	learn: 0.0570471	total: 2.03s	remaining: 2.07s
110:	learn: 0.0569798	total: 2.05s	remaining: 2.05s
111:	learn: 0.0568839	total: 2.07s	remaining: 2.03s
112:	learn: 0.0567257	total: 2.09s	remaining: 2.02s
113:	learn: 0.0566146	total: 2.11s	remaining: 2s
114:	learn: 0.0565351	total: 2.13s	remaining: 1.98s
115:	learn: 0.0563980	total: 2.14s	remaining: 1.96s
116:	learn: 0.0562987	total: 2.16s	remaining: 1.94s
117:	learn: 0.0562029	total: 2.18s	remaining: 1.92s
118:	learn: 0.0561083	total: 2.2s	remaining: 1.91s
119:	learn: 0.0559923	total: 2.22s	remaining: 1.89s
120:	learn: 0.0558992	total: 2.24s	remaining: 1.87s
121:	learn: 0.0557730	total: 2.26s	remaining: 1.85s
122:	learn: 0.0556918	total: 2.28s	remaining: 1.83s
123:	learn: 0.0555970	total: 2.3s	remaining: 1.82s
124:	learn: 0.0555365	total: 2.32s	remaining: 1.8s
125:	learn: 0.0554860	total: 2.33s	remaining: 1.78s
126:	learn: 0.05540

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0743670	total: 27.6ms	remaining: 6.1s
1:	learn: 0.0734376	total: 52.8ms	remaining: 5.81s
2:	learn: 0.0724832	total: 76.2ms	remaining: 5.56s
3:	learn: 0.0722080	total: 99.3ms	remaining: 5.41s
4:	learn: 0.0719552	total: 122ms	remaining: 5.29s
5:	learn: 0.0715132	total: 145ms	remaining: 5.23s
6:	learn: 0.0708835	total: 169ms	remaining: 5.19s
7:	learn: 0.0708189	total: 191ms	remaining: 5.12s
8:	learn: 0.0701351	total: 214ms	remaining: 5.07s
9:	learn: 0.0696264	total: 240ms	remaining: 5.09s
10:	learn: 0.0692107	total: 264ms	remaining: 5.06s
11:	learn: 0.0689894	total: 287ms	remaining: 5.02s
12:	learn: 0.0687501	total: 310ms	remaining: 4.99s
13:	learn: 0.0686465	total: 333ms	remaining: 4.95s
14:	learn: 0.0684284	total: 360ms	remaining: 4.96s
15:	learn: 0.0681882	total: 384ms	remaining: 4.94s
16:	learn: 0.0681108	total: 407ms	remaining: 4.91s
17:	learn: 0.0679660	total: 432ms	remaining: 4.89s
18:	learn: 0.0676954	total: 456ms	remaining: 4.87s
19:	learn: 0.0676226	total: 479ms	rema

162:	learn: 0.0519843	total: 4.42s	remaining: 1.6s
163:	learn: 0.0518890	total: 4.45s	remaining: 1.57s
164:	learn: 0.0517689	total: 4.47s	remaining: 1.54s
165:	learn: 0.0517024	total: 4.5s	remaining: 1.52s
166:	learn: 0.0516265	total: 4.52s	remaining: 1.49s
167:	learn: 0.0515703	total: 4.55s	remaining: 1.46s
168:	learn: 0.0514597	total: 4.58s	remaining: 1.44s
169:	learn: 0.0513969	total: 4.6s	remaining: 1.41s
170:	learn: 0.0513199	total: 4.63s	remaining: 1.38s
171:	learn: 0.0512099	total: 4.66s	remaining: 1.35s
172:	learn: 0.0510999	total: 4.69s	remaining: 1.33s
173:	learn: 0.0510024	total: 4.71s	remaining: 1.3s
174:	learn: 0.0509265	total: 4.74s	remaining: 1.27s
175:	learn: 0.0508076	total: 4.76s	remaining: 1.24s
176:	learn: 0.0507815	total: 4.78s	remaining: 1.22s
177:	learn: 0.0507325	total: 4.81s	remaining: 1.19s
178:	learn: 0.0506480	total: 4.84s	remaining: 1.16s
179:	learn: 0.0505964	total: 4.86s	remaining: 1.13s
180:	learn: 0.0505686	total: 4.88s	remaining: 1.11s
181:	learn: 0.05

104:	learn: 0.0573100	total: 2.85s	remaining: 3.17s
105:	learn: 0.0572656	total: 2.87s	remaining: 3.14s
106:	learn: 0.0571621	total: 2.9s	remaining: 3.11s
107:	learn: 0.0570128	total: 2.92s	remaining: 3.08s
108:	learn: 0.0569033	total: 2.95s	remaining: 3.05s
109:	learn: 0.0567566	total: 2.97s	remaining: 3.02s
110:	learn: 0.0566232	total: 3s	remaining: 3s
111:	learn: 0.0564870	total: 3.02s	remaining: 2.96s
112:	learn: 0.0563769	total: 3.05s	remaining: 2.94s
113:	learn: 0.0562602	total: 3.07s	remaining: 2.91s
114:	learn: 0.0562170	total: 3.1s	remaining: 2.88s
115:	learn: 0.0561266	total: 3.12s	remaining: 2.85s
116:	learn: 0.0559855	total: 3.15s	remaining: 2.82s
117:	learn: 0.0558892	total: 3.17s	remaining: 2.79s
118:	learn: 0.0557700	total: 3.2s	remaining: 2.77s
119:	learn: 0.0556769	total: 3.22s	remaining: 2.74s
120:	learn: 0.0555664	total: 3.25s	remaining: 2.71s
121:	learn: 0.0554833	total: 3.28s	remaining: 2.69s
122:	learn: 0.0553813	total: 3.3s	remaining: 2.66s
123:	learn: 0.0553266	

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0750718	total: 46.6ms	remaining: 10.3s
1:	learn: 0.0743800	total: 61ms	remaining: 6.71s
2:	learn: 0.0738418	total: 74.3ms	remaining: 5.42s
3:	learn: 0.0733880	total: 87.5ms	remaining: 4.77s
4:	learn: 0.0731010	total: 99.4ms	remaining: 4.32s
5:	learn: 0.0728329	total: 113ms	remaining: 4.05s
6:	learn: 0.0726182	total: 125ms	remaining: 3.85s
7:	learn: 0.0724297	total: 139ms	remaining: 3.71s
8:	learn: 0.0723181	total: 151ms	remaining: 3.58s
9:	learn: 0.0722257	total: 162ms	remaining: 3.43s
10:	learn: 0.0720304	total: 172ms	remaining: 3.3s
11:	learn: 0.0719606	total: 181ms	remaining: 3.17s
12:	learn: 0.0718747	total: 192ms	remaining: 3.09s
13:	learn: 0.0718418	total: 196ms	remaining: 2.91s
14:	learn: 0.0717493	total: 206ms	remaining: 2.84s
15:	learn: 0.0716658	total: 216ms	remaining: 2.78s
16:	learn: 0.0716020	total: 228ms	remaining: 2.75s
17:	learn: 0.0715368	total: 238ms	remaining: 2.7s
18:	learn: 0.0714849	total: 249ms	remaining: 2.66s
19:	learn: 0.0713851	total: 259ms	remain

164:	learn: 0.0621084	total: 1.8s	remaining: 623ms
165:	learn: 0.0620844	total: 1.81s	remaining: 612ms
166:	learn: 0.0620188	total: 1.83s	remaining: 602ms
167:	learn: 0.0619693	total: 1.84s	remaining: 591ms
168:	learn: 0.0619360	total: 1.85s	remaining: 581ms
169:	learn: 0.0618706	total: 1.86s	remaining: 570ms
170:	learn: 0.0618113	total: 1.87s	remaining: 559ms
171:	learn: 0.0617570	total: 1.88s	remaining: 548ms
172:	learn: 0.0616716	total: 1.9s	remaining: 537ms
173:	learn: 0.0616585	total: 1.91s	remaining: 526ms
174:	learn: 0.0616014	total: 1.92s	remaining: 515ms
175:	learn: 0.0615500	total: 1.93s	remaining: 504ms
176:	learn: 0.0614924	total: 1.94s	remaining: 493ms
177:	learn: 0.0614206	total: 1.95s	remaining: 482ms
178:	learn: 0.0613904	total: 1.96s	remaining: 472ms
179:	learn: 0.0613518	total: 1.97s	remaining: 461ms
180:	learn: 0.0612746	total: 1.99s	remaining: 450ms
181:	learn: 0.0612475	total: 2s	remaining: 439ms
182:	learn: 0.0611766	total: 2.01s	remaining: 428ms
183:	learn: 0.061

117:	learn: 0.0647911	total: 1.27s	remaining: 1.11s
118:	learn: 0.0647351	total: 1.28s	remaining: 1.11s
119:	learn: 0.0646799	total: 1.29s	remaining: 1.09s
120:	learn: 0.0646102	total: 1.3s	remaining: 1.08s
121:	learn: 0.0645337	total: 1.31s	remaining: 1.08s
122:	learn: 0.0644833	total: 1.32s	remaining: 1.06s
123:	learn: 0.0644037	total: 1.33s	remaining: 1.05s
124:	learn: 0.0643418	total: 1.35s	remaining: 1.05s
125:	learn: 0.0642985	total: 1.36s	remaining: 1.03s
126:	learn: 0.0642440	total: 1.37s	remaining: 1.02s
127:	learn: 0.0641937	total: 1.38s	remaining: 1.01s
128:	learn: 0.0641673	total: 1.39s	remaining: 1s
129:	learn: 0.0641373	total: 1.4s	remaining: 993ms
130:	learn: 0.0640434	total: 1.41s	remaining: 982ms
131:	learn: 0.0639718	total: 1.42s	remaining: 971ms
132:	learn: 0.0638544	total: 1.44s	remaining: 962ms
133:	learn: 0.0638084	total: 1.45s	remaining: 953ms
134:	learn: 0.0637490	total: 1.46s	remaining: 943ms
135:	learn: 0.0637053	total: 1.47s	remaining: 932ms
136:	learn: 0.063

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0752718	total: 12.4ms	remaining: 2.73s
1:	learn: 0.0748620	total: 23.1ms	remaining: 2.54s
2:	learn: 0.0742857	total: 31.5ms	remaining: 2.3s
3:	learn: 0.0738481	total: 38.7ms	remaining: 2.11s
4:	learn: 0.0733291	total: 45.2ms	remaining: 1.96s
5:	learn: 0.0729990	total: 52.3ms	remaining: 1.88s
6:	learn: 0.0727550	total: 59ms	remaining: 1.81s
7:	learn: 0.0726009	total: 66.7ms	remaining: 1.78s
8:	learn: 0.0723743	total: 73.5ms	remaining: 1.74s
9:	learn: 0.0722635	total: 79.3ms	remaining: 1.68s
10:	learn: 0.0721429	total: 86.1ms	remaining: 1.65s
11:	learn: 0.0717411	total: 92.2ms	remaining: 1.61s
12:	learn: 0.0715955	total: 96.5ms	remaining: 1.55s
13:	learn: 0.0715268	total: 103ms	remaining: 1.53s
14:	learn: 0.0713612	total: 109ms	remaining: 1.5s
15:	learn: 0.0711097	total: 115ms	remaining: 1.48s
16:	learn: 0.0709759	total: 121ms	remaining: 1.45s
17:	learn: 0.0709294	total: 127ms	remaining: 1.44s
18:	learn: 0.0708014	total: 134ms	remaining: 1.43s
19:	learn: 0.0706866	total: 139m

187:	learn: 0.0664825	total: 1.17s	remaining: 212ms
188:	learn: 0.0664606	total: 1.18s	remaining: 205ms
189:	learn: 0.0664379	total: 1.18s	remaining: 199ms
190:	learn: 0.0664210	total: 1.19s	remaining: 193ms
191:	learn: 0.0664008	total: 1.2s	remaining: 187ms
192:	learn: 0.0663878	total: 1.2s	remaining: 180ms
193:	learn: 0.0663708	total: 1.21s	remaining: 174ms
194:	learn: 0.0663555	total: 1.21s	remaining: 168ms
195:	learn: 0.0663300	total: 1.22s	remaining: 162ms
196:	learn: 0.0663061	total: 1.22s	remaining: 155ms
197:	learn: 0.0663015	total: 1.23s	remaining: 149ms
198:	learn: 0.0662824	total: 1.24s	remaining: 143ms
199:	learn: 0.0662577	total: 1.24s	remaining: 136ms
200:	learn: 0.0662374	total: 1.25s	remaining: 130ms
201:	learn: 0.0662209	total: 1.25s	remaining: 124ms
202:	learn: 0.0661963	total: 1.25s	remaining: 118ms
203:	learn: 0.0661687	total: 1.26s	remaining: 111ms
204:	learn: 0.0661505	total: 1.27s	remaining: 105ms
205:	learn: 0.0661276	total: 1.27s	remaining: 98.8ms
206:	learn: 0

145:	learn: 0.0672541	total: 879ms	remaining: 458ms
146:	learn: 0.0672447	total: 885ms	remaining: 452ms
147:	learn: 0.0672235	total: 891ms	remaining: 446ms
148:	learn: 0.0671970	total: 897ms	remaining: 439ms
149:	learn: 0.0671784	total: 904ms	remaining: 434ms
150:	learn: 0.0671593	total: 910ms	remaining: 428ms
151:	learn: 0.0671471	total: 915ms	remaining: 421ms
152:	learn: 0.0671383	total: 921ms	remaining: 415ms
153:	learn: 0.0671189	total: 927ms	remaining: 409ms
154:	learn: 0.0671010	total: 933ms	remaining: 403ms
155:	learn: 0.0670860	total: 939ms	remaining: 397ms
156:	learn: 0.0670582	total: 945ms	remaining: 391ms
157:	learn: 0.0670354	total: 950ms	remaining: 385ms
158:	learn: 0.0670108	total: 956ms	remaining: 379ms
159:	learn: 0.0669877	total: 962ms	remaining: 373ms
160:	learn: 0.0669674	total: 969ms	remaining: 367ms
161:	learn: 0.0669468	total: 974ms	remaining: 361ms
162:	learn: 0.0669260	total: 981ms	remaining: 355ms
163:	learn: 0.0669099	total: 986ms	remaining: 349ms
164:	learn: 

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0753486	total: 19.5ms	remaining: 4.32s
1:	learn: 0.0739159	total: 38.8ms	remaining: 4.26s
2:	learn: 0.0728313	total: 57.6ms	remaining: 4.21s
3:	learn: 0.0721351	total: 76.4ms	remaining: 4.16s
4:	learn: 0.0717142	total: 94.4ms	remaining: 4.1s
5:	learn: 0.0713131	total: 113ms	remaining: 4.08s
6:	learn: 0.0708830	total: 132ms	remaining: 4.06s
7:	learn: 0.0706812	total: 150ms	remaining: 4.01s
8:	learn: 0.0701516	total: 168ms	remaining: 3.97s
9:	learn: 0.0696604	total: 188ms	remaining: 3.99s
10:	learn: 0.0693678	total: 209ms	remaining: 4s
11:	learn: 0.0688603	total: 230ms	remaining: 4.02s
12:	learn: 0.0686287	total: 249ms	remaining: 4s
13:	learn: 0.0684364	total: 268ms	remaining: 3.98s
14:	learn: 0.0682444	total: 285ms	remaining: 3.94s
15:	learn: 0.0680356	total: 304ms	remaining: 3.92s
16:	learn: 0.0678645	total: 323ms	remaining: 3.89s
17:	learn: 0.0677156	total: 340ms	remaining: 3.86s
18:	learn: 0.0675821	total: 358ms	remaining: 3.83s
19:	learn: 0.0674168	total: 377ms	remaining

167:	learn: 0.0520845	total: 3.06s	remaining: 984ms
168:	learn: 0.0519654	total: 3.08s	remaining: 966ms
169:	learn: 0.0518669	total: 3.1s	remaining: 947ms
170:	learn: 0.0518332	total: 3.11s	remaining: 928ms
171:	learn: 0.0517380	total: 3.13s	remaining: 909ms
172:	learn: 0.0516748	total: 3.14s	remaining: 891ms
173:	learn: 0.0516212	total: 3.16s	remaining: 872ms
174:	learn: 0.0515419	total: 3.18s	remaining: 853ms
175:	learn: 0.0514511	total: 3.2s	remaining: 836ms
176:	learn: 0.0513808	total: 3.21s	remaining: 817ms
177:	learn: 0.0512975	total: 3.23s	remaining: 799ms
178:	learn: 0.0512455	total: 3.25s	remaining: 782ms
179:	learn: 0.0511785	total: 3.27s	remaining: 764ms
180:	learn: 0.0511028	total: 3.29s	remaining: 746ms
181:	learn: 0.0510083	total: 3.31s	remaining: 727ms
182:	learn: 0.0509265	total: 3.33s	remaining: 709ms
183:	learn: 0.0508544	total: 3.34s	remaining: 691ms
184:	learn: 0.0507749	total: 3.36s	remaining: 672ms
185:	learn: 0.0507281	total: 3.38s	remaining: 654ms
186:	learn: 0.

109:	learn: 0.0570471	total: 2.08s	remaining: 2.12s
110:	learn: 0.0569798	total: 2.1s	remaining: 2.1s
111:	learn: 0.0568839	total: 2.12s	remaining: 2.08s
112:	learn: 0.0567257	total: 2.14s	remaining: 2.06s
113:	learn: 0.0566146	total: 2.15s	remaining: 2.04s
114:	learn: 0.0565351	total: 2.17s	remaining: 2.02s
115:	learn: 0.0563980	total: 2.19s	remaining: 2s
116:	learn: 0.0562987	total: 2.21s	remaining: 1.98s
117:	learn: 0.0562029	total: 2.23s	remaining: 1.96s
118:	learn: 0.0561083	total: 2.25s	remaining: 1.94s
119:	learn: 0.0559923	total: 2.27s	remaining: 1.93s
120:	learn: 0.0558992	total: 2.29s	remaining: 1.91s
121:	learn: 0.0557730	total: 2.3s	remaining: 1.89s
122:	learn: 0.0556918	total: 2.32s	remaining: 1.87s
123:	learn: 0.0555970	total: 2.34s	remaining: 1.85s
124:	learn: 0.0555365	total: 2.36s	remaining: 1.83s
125:	learn: 0.0554860	total: 2.38s	remaining: 1.81s
126:	learn: 0.0554004	total: 2.4s	remaining: 1.79s
127:	learn: 0.0552971	total: 2.42s	remaining: 1.77s
128:	learn: 0.05515

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

0:	learn: 0.0743670	total: 26.7ms	remaining: 5.91s
1:	learn: 0.0734376	total: 53.2ms	remaining: 5.85s
2:	learn: 0.0724832	total: 80.4ms	remaining: 5.87s
3:	learn: 0.0722080	total: 106ms	remaining: 5.76s
4:	learn: 0.0719552	total: 131ms	remaining: 5.67s
5:	learn: 0.0715132	total: 155ms	remaining: 5.59s
6:	learn: 0.0708835	total: 181ms	remaining: 5.56s
7:	learn: 0.0708189	total: 206ms	remaining: 5.51s
8:	learn: 0.0701351	total: 235ms	remaining: 5.55s
9:	learn: 0.0696264	total: 264ms	remaining: 5.59s
10:	learn: 0.0692107	total: 290ms	remaining: 5.55s
11:	learn: 0.0689894	total: 316ms	remaining: 5.53s
12:	learn: 0.0687501	total: 341ms	remaining: 5.49s
13:	learn: 0.0686465	total: 368ms	remaining: 5.47s
14:	learn: 0.0684284	total: 395ms	remaining: 5.45s
15:	learn: 0.0681882	total: 422ms	remaining: 5.43s
16:	learn: 0.0681108	total: 451ms	remaining: 5.44s
17:	learn: 0.0679660	total: 482ms	remaining: 5.46s
18:	learn: 0.0676954	total: 508ms	remaining: 5.42s
19:	learn: 0.0676226	total: 533ms	rema

165:	learn: 0.0517024	total: 4.23s	remaining: 1.43s
166:	learn: 0.0516265	total: 4.25s	remaining: 1.4s
167:	learn: 0.0515703	total: 4.28s	remaining: 1.38s
168:	learn: 0.0514597	total: 4.3s	remaining: 1.35s
169:	learn: 0.0513969	total: 4.33s	remaining: 1.32s
170:	learn: 0.0513199	total: 4.35s	remaining: 1.3s
171:	learn: 0.0512099	total: 4.38s	remaining: 1.27s
172:	learn: 0.0510999	total: 4.4s	remaining: 1.25s
173:	learn: 0.0510024	total: 4.42s	remaining: 1.22s
174:	learn: 0.0509265	total: 4.45s	remaining: 1.19s
175:	learn: 0.0508076	total: 4.47s	remaining: 1.17s
176:	learn: 0.0507815	total: 4.5s	remaining: 1.14s
177:	learn: 0.0507325	total: 4.52s	remaining: 1.12s
178:	learn: 0.0506480	total: 4.54s	remaining: 1.09s
179:	learn: 0.0505964	total: 4.57s	remaining: 1.06s
180:	learn: 0.0505686	total: 4.59s	remaining: 1.04s
181:	learn: 0.0504442	total: 4.61s	remaining: 1.01s
182:	learn: 0.0503843	total: 4.64s	remaining: 988ms
183:	learn: 0.0503250	total: 4.66s	remaining: 963ms
184:	learn: 0.050

105:	learn: 0.0572656	total: 2.85s	remaining: 3.12s
106:	learn: 0.0571621	total: 2.88s	remaining: 3.09s
107:	learn: 0.0570128	total: 2.9s	remaining: 3.06s
108:	learn: 0.0569033	total: 2.92s	remaining: 3.03s
109:	learn: 0.0567566	total: 2.95s	remaining: 3s
110:	learn: 0.0566232	total: 2.98s	remaining: 2.98s
111:	learn: 0.0564870	total: 3s	remaining: 2.95s
112:	learn: 0.0563769	total: 3.02s	remaining: 2.92s
113:	learn: 0.0562602	total: 3.05s	remaining: 2.89s
114:	learn: 0.0562170	total: 3.08s	remaining: 2.86s
115:	learn: 0.0561266	total: 3.1s	remaining: 2.83s
116:	learn: 0.0559855	total: 3.13s	remaining: 2.81s
117:	learn: 0.0558892	total: 3.15s	remaining: 2.78s
118:	learn: 0.0557700	total: 3.18s	remaining: 2.75s
119:	learn: 0.0556769	total: 3.21s	remaining: 2.73s
120:	learn: 0.0555664	total: 3.23s	remaining: 2.7s
121:	learn: 0.0554833	total: 3.26s	remaining: 2.67s
122:	learn: 0.0553813	total: 3.29s	remaining: 2.64s
123:	learn: 0.0553266	total: 3.31s	remaining: 2.62s
124:	learn: 0.0552320

In [22]:
results = []
for model in all_results.keys():
    for col in all_results[model]:
        tmp = pd.concat(all_results[model][col])
        tmp["model"] = model
        tmp["feature"] = col
        results.append(tmp)
results = pd.concat(results)
results["label"] = [x[0] for x in results.index]
results.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,code,datetime,predict,predict_dir,actual,actual_dir,model,feature,label
Unnamed: 0_level_1,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
label_high_20,2019-02-08,1301,2019-02-08,-0.04169,-1.0,0.00604,1.0,XGB,fundamental_only,label_high_20
label_high_20,2019-05-13,1301,2019-05-13,-0.023208,-1.0,-0.06317,-1.0,XGB,fundamental_only,label_high_20
label_high_20,2019-08-02,1301,2019-08-02,-0.062027,-1.0,-0.08723,-1.0,XGB,fundamental_only,label_high_20
label_high_20,2019-11-05,1301,2019-11-05,-0.043398,-1.0,-0.038,-1.0,XGB,fundamental_only,label_high_20
label_high_20,2020-02-07,1301,2020-02-07,-0.044869,-1.0,-0.17832,-1.0,XGB,fundamental_only,label_high_20


In [23]:
# 結果保存用変数
all_metrics = []

# データセット毎に処理
for feature in columns:
    matrix = dict()
    # 目的変数毎に処理
    for label in labels:
        # 処理対象データに絞り込み
        tmp_df = results[(results["model"] == "XGB") & (results["label"] == label) & (results["feature"] == feature)]
        # RMSE
        rmse = np.sqrt(mean_squared_error(tmp_df["predict"], tmp_df["actual"]))
        # 精度
        accuracy = accuracy_score(tmp_df["predict_dir"], tmp_df["actual_dir"])
        # 相関係数
        corr = np.corrcoef(tmp_df["actual"], tmp_df["predict"])[0, 1]
        # 順位相関
        spearman_corr = spearmanr(tmp_df["actual"], tmp_df["predict"])[0]
        # 結果を保存
        matrix[label] = [rmse, accuracy, spearman_corr,corr, corr**2, feature, model, tmp_df.shape[0]]
    res = pd.DataFrame.from_dict(matrix).T
    res.columns = ["RMSE","accuracy","spearman_corr","corr","R^2 score","feature", "model", "# of samples"]
    all_metrics.append(res)
all_metrics = pd.concat(all_metrics)
all_metrics.reset_index()

Unnamed: 0,index,RMSE,accuracy,spearman_corr,corr,R^2 score,feature,model,# of samples
0,label_high_20,0.104447,0.866738,0.14067,0.143726,0.020657,fundamental_only,XGB,32515
1,label_low_20,0.104447,0.866738,0.14067,0.143726,0.020657,fundamental_only,XGB,32515
2,label_high_20,0.107356,0.867784,0.0804746,0.0585454,0.00342756,return_only,XGB,32515
3,label_low_20,0.107356,0.867784,0.0804746,0.0585454,0.00342756,return_only,XGB,32515
4,label_high_20,0.105467,0.861449,0.153756,0.146159,0.0213624,technical_only,XGB,32515
5,label_low_20,0.105467,0.861449,0.153756,0.146159,0.0213624,technical_only,XGB,32515
6,label_high_20,0.104509,0.864155,0.159079,0.153215,0.0234747,fundamental+technical,XGB,32515
7,label_low_20,0.104509,0.864155,0.159079,0.153215,0.0234747,fundamental+technical,XGB,32515


In [24]:
tmp = all_metrics.drop(columns=["# of samples"])

In [25]:
tmp.to_csv('result_XGB.csv', index=None)

In [26]:
tmp = pd.read_csv('result_XGB.csv')

In [27]:
tmp.groupby(['feature', 'model']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,RMSE,accuracy,spearman_corr,corr,R^2 score
feature,model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
fundamental+technical,XGB,0.104509,0.864155,0.159079,0.153215,0.023475
fundamental_only,XGB,0.104447,0.866738,0.14067,0.143726,0.020657
return_only,XGB,0.107356,0.867784,0.080475,0.058545,0.003428
technical_only,XGB,0.105467,0.861449,0.153756,0.146159,0.021362
