# FXの将来のリターン動向の予測モデル

目的：将来1時間ごとのリターン正負の予測  
モデル：Extremely Fast Decision Tree   
開発環境: python 3.8.10/ JupyterLab 2.3.2/ System Information: System: Linux (179-Ubuntu SMP)

## 必要なライブラリをインポート

In [47]:
import pandas as pd
import numpy as np

from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.file_stream import FileStream
from skmultiflow.utils import get_dimensions

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

Pandas version: 1.5.2   
Numpy version: 1.22.2   
Scikit-learn version: 1.2.0   
Scikit-multiflow version: 0.5.3

## 前処理

In [48]:
# USDJPY通貨ペアデータセットをDataFrameに読み込む
USDJPY_ti = pd.read_csv('USDJPY_ti.csv',sep = ',')

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022
...,...,...,...,...,...,...,...,...,...
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017
3090164,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016


In [49]:
# 1時間ごとのリターンを計算する, ln[p(t)/p(t-60)]
USDJPY_ti['Return'] = np.log(USDJPY_ti['Final Price'] / USDJPY_ti['Final Price'].shift(60))
USDJPY_ti['Return'] = USDJPY_ti['Return'].fillna(0)

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059,0.000000
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034,0.000000
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014,0.000000
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025,0.000000
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022,0.000000
...,...,...,...,...,...,...,...,...,...,...
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,-0.000094
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,-0.000107
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,-0.000120
3090164,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016,-0.000100


In [57]:
# データセットを切り捨てる
USDJPY_ti.set_index('Times', inplace=True)

start_date = '2018-01-01 19:09:00'
end_date = '2023-11-17 16:54:00'
USDJPY_ti = USDJPY_ti.loc[start_date:end_date]

USDJPY_ti.reset_index(inplace=True)

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 19:09:00,112.750,0.227615,112.825347,112.629053,112.727200,64.759036,0.000870,0.098,0.000887
1,2018-01-01 19:10:00,112.751,0.227635,112.825058,112.632675,112.728867,68.025078,0.001021,0.115,0.001029
2,2018-01-01 19:11:00,112.757,0.227753,112.823931,112.637669,112.730800,66.990291,0.000932,0.105,0.000985
3,2018-01-01 19:12:00,112.750,0.227615,112.823306,112.641994,112.732650,67.834395,0.000994,0.112,0.000932
4,2018-01-01 19:13:00,112.742,0.227457,112.822182,112.646618,112.734400,66.981132,0.000958,0.108,0.000887
...,...,...,...,...,...,...,...,...,...,...
3090101,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,-0.000094
3090102,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,-0.000107
3090103,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,-0.000120
3090104,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016,-0.000100


In [51]:
# 'Times'列を日時型に変換
USDJPY_ti['Times'] = pd.to_datetime(USDJPY_ti['Times'])

# 時間が整数の時刻の行を抽出
USDJPY_ti_hour = USDJPY_ti[USDJPY_ti['Times'].dt.minute == 0]

# インデックスを0から再設定
USDJPY_ti_hour = USDJPY_ti_hour.reset_index(drop=True)

display(USDJPY_ti_hour)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 20:00:00,112.737,0.227359,112.778852,112.723148,112.751000,45.015106,-0.000293,-0.033,-0.000275
1,2018-01-01 21:00:00,112.713,0.226886,112.732030,112.660203,112.696117,48.173516,-0.000142,-0.016,-0.000213
2,2018-01-01 22:00:00,112.713,0.226886,112.740615,112.702818,112.721717,50.175439,0.000009,0.001,0.000000
3,2018-01-01 23:00:00,112.698,0.226590,112.708154,112.668180,112.688167,47.450980,-0.000115,-0.013,-0.000133
4,2018-01-02 00:00:00,112.707,0.226767,112.731664,112.692403,112.712033,52.840909,0.000089,0.010,0.000080
...,...,...,...,...,...,...,...,...,...,...
51496,2023-11-17 12:00:00,149.790,0.957813,149.913520,149.538347,149.725933,66.705607,0.001911,0.286,0.001711
51497,2023-11-17 13:00:00,149.700,0.956038,149.800490,149.700043,149.750267,40.944882,-0.000614,-0.092,-0.000601
51498,2023-11-17 14:00:00,149.623,0.954520,149.738318,149.609349,149.673833,40.416667,-0.000615,-0.092,-0.000514
51499,2023-11-17 15:00:00,149.654,0.955131,149.695923,149.584043,149.639983,52.444444,0.000147,0.022,0.000207


In [52]:
# もしリターンが０、もしくは０以上であれば、その時点のリターンはプラスです。それ以外の場合はマイナス
# Label＿nowは今この時点のリターン、Labelは次の時点のリターン
USDJPY_ti_hour['Label_now'] = np.where(USDJPY_ti_hour['Return'] >= 0, 1, 0)
USDJPY_ti_hour['Label'] = USDJPY_ti_hour['Label_now'].shift(-1)

USDJPY_ti_hour = USDJPY_ti_hour.drop(USDJPY_ti_hour.index[-1])

USDJPY_ti_hour['Label'] = USDJPY_ti_hour['Label'].astype(int)

display(USDJPY_ti_hour)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return,Label_now,Label
0,2018-01-01 20:00:00,112.737,0.227359,112.778852,112.723148,112.751000,45.015106,-0.000293,-0.033,-0.000275,0,0
1,2018-01-01 21:00:00,112.713,0.226886,112.732030,112.660203,112.696117,48.173516,-0.000142,-0.016,-0.000213,0,1
2,2018-01-01 22:00:00,112.713,0.226886,112.740615,112.702818,112.721717,50.175439,0.000009,0.001,0.000000,1,0
3,2018-01-01 23:00:00,112.698,0.226590,112.708154,112.668180,112.688167,47.450980,-0.000115,-0.013,-0.000133,0,1
4,2018-01-02 00:00:00,112.707,0.226767,112.731664,112.692403,112.712033,52.840909,0.000089,0.010,0.000080,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
51495,2023-11-17 11:00:00,149.534,0.952766,149.681662,149.549205,149.615433,47.276941,-0.000314,-0.047,-0.000461,0,1
51496,2023-11-17 12:00:00,149.790,0.957813,149.913520,149.538347,149.725933,66.705607,0.001911,0.286,0.001711,1,0
51497,2023-11-17 13:00:00,149.700,0.956038,149.800490,149.700043,149.750267,40.944882,-0.000614,-0.092,-0.000601,0,0
51498,2023-11-17 14:00:00,149.623,0.954520,149.738318,149.609349,149.673833,40.416667,-0.000615,-0.092,-0.000514,0,1


In [53]:
USDJPY_ti_hour = USDJPY_ti_hour[['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum','Label']]
display(USDJPY_ti_hour)

# 処理済みのデータを保存する
USDJPY_ti_hour.to_csv('USDJPY_hour_ti.csv', index=False)

Unnamed: 0,Final Price,Return,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Label
0,112.737,-0.000275,112.778852,112.723148,112.751000,45.015106,-0.000293,-0.033,0
1,112.713,-0.000213,112.732030,112.660203,112.696117,48.173516,-0.000142,-0.016,1
2,112.713,0.000000,112.740615,112.702818,112.721717,50.175439,0.000009,0.001,0
3,112.698,-0.000133,112.708154,112.668180,112.688167,47.450980,-0.000115,-0.013,1
4,112.707,0.000080,112.731664,112.692403,112.712033,52.840909,0.000089,0.010,0
...,...,...,...,...,...,...,...,...,...
51495,149.534,-0.000461,149.681662,149.549205,149.615433,47.276941,-0.000314,-0.047,1
51496,149.790,0.001711,149.913520,149.538347,149.725933,66.705607,0.001911,0.286,0
51497,149.700,-0.000601,149.800490,149.700043,149.750267,40.944882,-0.000614,-0.092,0
51498,149.623,-0.000514,149.738318,149.609349,149.673833,40.416667,-0.000615,-0.092,1


In [54]:
usdjpy = pd.read_csv('USDJPY_hour_ti.csv', sep=',')

display(usdjpy)

Unnamed: 0,Final Price,Return,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Label
0,112.737,-0.000275,112.778852,112.723148,112.751000,45.015106,-0.000293,-0.033,0
1,112.713,-0.000213,112.732030,112.660203,112.696117,48.173516,-0.000142,-0.016,1
2,112.713,0.000000,112.740615,112.702818,112.721717,50.175439,0.000009,0.001,0
3,112.698,-0.000133,112.708154,112.668180,112.688167,47.450980,-0.000115,-0.013,1
4,112.707,0.000080,112.731664,112.692403,112.712033,52.840909,0.000089,0.010,0
...,...,...,...,...,...,...,...,...,...
51495,149.534,-0.000461,149.681662,149.549205,149.615433,47.276941,-0.000314,-0.047,1
51496,149.790,0.001711,149.913520,149.538347,149.725933,66.705607,0.001911,0.286,0
51497,149.700,-0.000601,149.800490,149.700043,149.750267,40.944882,-0.000614,-0.092,0
51498,149.623,-0.000514,149.738318,149.609349,149.673833,40.416667,-0.000615,-0.092,1


## モデルの構築

In [9]:
# ファイルからストリームを作成して、データを1つずつモデルに入力する
stream = FileStream('USDJPY_hour_ti.csv',target_idx=-1)

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
nominal_attributes = ['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum']

# 指定された名義属性を持つ ExtremelyFastDecisionTreeClassifier を作成する
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal_attributes)

# モデルの性能を評価するための評価器を作成する
metrics = ['precision', 'recall', 'accuracy', 'kappa', 'f1']
evaluator = EvaluatePrequential(show_plot=False, pretrain_size=90000, max_samples=3090000, metrics=metrics)

# モデルを訓練、評価する
evaluator.evaluate(stream=stream, model=efdt)

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 1000 sample(s).
Evaluating...
 #################### [100%] [107.73s]
Processed samples: 10000
Mean performance:
M0 - Accuracy     : 0.6569
M0 - Kappa        : 0.3110
M0 - Precision: 0.6281
M0 - Recall: 0.7932
M0 - F1 score: 0.7011


[ExtremelyFastDecisionTreeClassifier(binary_split=False, grace_period=200,
                                     leaf_prediction='nba',
                                     max_byte_size=33554432,
                                     memory_estimate_period=1000000,
                                     min_samples_reevaluate=20, nb_threshold=0,
                                     nominal_attributes=['Final Price', 'Return',
                                                         'Upper Band',
                                                         'Lower Band',
                                                         'Rolling Mean', 'RSI',
                                                         'RCI', 'Momentum'],
                                     split_confidence=1e-07,
                                     split_criterion='info_gain',
                                     stop_mem_management=False,
                                     tie_threshold=0.05)]

## モデルの訓練とテスト

In [None]:
stream.restart()

In [45]:
# ファイルからストリームを作成して、データを1つずつモデルに入力する
stream = FileStream('USDJPY_hour_ti.csv',target_idx=-1)

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
nominal_attributes = ['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum']

# 指定された名義属性を持つ ExtremelyFastDecisionTreeClassifierを作成する
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal_attributes)
train_set = []

# モデルを訓練する
for i in range(1000):  
    X, y = stream.next_sample()
    train_set.append(X)
    efdt.fit(X, y, classes=np.unique(stream.target_values))

In [46]:
# 実際のラベルのリストと予測ラベルのリスト
test_data = []
true_labels = []
predicted_labels = []

# 次の10000サンプルに対する予測を収集
for i in range(10000):
    X, y_true = stream.next_sample()
    y_pred = efdt.predict(X)
    
    test_data.append(X)
    true_labels.append(y_true)
    predicted_labels.append(y_pred)
    
true_labels = np.concatenate(true_labels)
predicted_labels = np.concatenate(predicted_labels)

# 評価メトリクスを計算
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
auc = roc_auc_score(true_labels, predicted_labels)  # AUC for binary classification
aupr = average_precision_score(true_labels, predicted_labels)  # AUC-PR for binary classification

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUPR: {aupr:.4f}")

Accuracy: 0.6516
Precision: 0.5941
Recall: 0.9744
F1 Score: 0.7381
AUC: 0.6491
AUPR: 0.5918


## モデルを解釈する

In [29]:
# 次のデータを用意
x, y_true = stream.next_sample()

In [30]:
# データ訓練済みのモデルに入力して予測を出力する、各ラベルに対する確率
y_prediction = efdt.predict(x)
probability = efdt.predict_proba(x)
print("The input is:", x)
print("The true label is:", y_true)
print("The prediction is:", y_prediction)
print("The probability of each label is:", probability)

The input is: [[ 1.50987000e+02 -7.34892530e-04  1.51175607e+02  1.50929493e+02
   1.51052550e+02  4.32033720e+01 -8.53539564e-04 -1.29000000e-01]]
The true label is: [0]
The prediction is: [1]
The probability of each label is: [[0.4666649 0.5333351]]


In [33]:
# 決定木を可視化する
tree_depth = efdt.measure_tree_depth()
print("The depth of the tree is:",tree_depth)

tree_size = efdt.measure_byte_size()
print("The size of the tree is:",tree_size)

The depth of the tree is: 5
The size of the tree is: 321574


In [36]:
# 決定木を可視化する
tree_structure = efdt.get_model_description()
print(tree_structure)

if Attribute 5 <= 45.45454545454546:
  if Attribute 5 <= 26.00422832980901:
    if Attribute 5 <= 8.860759493670871:
      if Attribute 1 <= -0.0004954102158651157:
        Leaf = Class 1 | {0: 65.27753503876738, 1: 141.6268448389601}
      if Attribute 1 > -0.0004954102158651157:
        if Attribute 1 <= -0.00022052145244946537:
          Leaf = Class 1 | {0: 14.870891096943524, 1: 18.161646170221502}
        if Attribute 1 > -0.00022052145244946537:
          Leaf = Class 0 | {0: 546.1291089030565, 1: 26.838353829778498}
    if Attribute 5 > 8.860759493670871:
      Leaf = Class 1 | {0: 64.24005932424657, 1: 84.5626825949001}
  if Attribute 5 > 26.00422832980901:
    if Attribute 1 <= -0.0022862271050180134:
      Leaf = Class 1 | {0: 239.68788869061973, 1: 454.6635599930305}
    if Attribute 1 > -0.0022862271050180134:
      if Attribute 0 <= 136.84224985011514:
        Leaf = Class 0 | {0: 135.0548041004673, 1: 111.9172151025948}
      if Attribute 0 > 136.84224985011514:
        

In [32]:
# モデルの意思決定プロセスを出力する
rules = efdt.get_rules_description()
print(rules)

Att (5) <= 45.450 and Att (5) <= 26.000 and Att (5) <= 8.860 and Att (1) <= -0.000 | class: 1
Att (5) <= 45.450 and Att (5) <= 26.000 and Att (5) <= 8.860 and Att (1) > -0.000 and Att (1) <= -0.000 | class: 1
Att (5) <= 45.450 and Att (5) <= 26.000 and Att (5) <= 8.860 and Att (1) > -0.000 and Att (1) > -0.000 | class: 0
Att (5) <= 45.450 and Att (5) <= 26.000 and Att (5) > 8.860 | class: 1
Att (5) <= 45.450 and Att (5) > 26.000 and Att (1) <= -0.000 | class: 1
Att (5) <= 45.450 and Att (5) > 26.000 and Att (1) > -0.000 and Att (0) <= 136.840 | class: 0
Att (5) <= 45.450 and Att (5) > 26.000 and Att (1) > -0.000 and Att (0) > 136.840 | class: 1
Att (5) > 45.450 and Att (5) <= 70.250 and Att (6) <= 0.000 and Att (1) <= 0.000 | class: 1
Att (5) > 45.450 and Att (5) <= 70.250 and Att (6) <= 0.000 and Att (1) > 0.000 | class: 1
Att (5) > 45.450 and Att (5) <= 70.250 and Att (6) > 0.000 and Att (1) <= 0.000 | class: 0
Att (5) > 45.450 and Att (5) <= 70.250 and Att (6) > 0.000 and Att (1) > 