# FXの将来のリターン動向の予測モデル

目的：将来5分ごとのリターン正負の予測  
モデル：Extremely Fast Decision Tree   
開発環境: python 3.8.10/ JupyterLab 2.3.2/ System Information: System: Linux (179-Ubuntu SMP) 

In [3]:
import pandas as pd
import numpy as np

from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.file_stream import FileStream
from skmultiflow.utils import get_dimensions

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

Pandas version: 1.5.2   
Numpy version: 1.22.2   
Scikit-learn version: 1.2.0   
Scikit-multiflow version: 0.5.3

## 前処理

In [4]:
# USDJPY通貨ペアデータセットをDataFrameに読み込む
USDJPY_ti = pd.read_csv('USDJPY_ti.csv',sep = ',')

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022
...,...,...,...,...,...,...,...,...,...
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017
3090164,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016


In [5]:
# 5分ごとのリターンを計算する, ln[p(t)/p(t-5)]
USDJPY_ti['Return'] = np.log(USDJPY_ti['Final Price'] / USDJPY_ti['Final Price'].shift(5))
USDJPY_ti['Return'] = USDJPY_ti['Return'].fillna(0)

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059,0.000000
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034,0.000000
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014,0.000000
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025,0.000000
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022,0.000000
...,...,...,...,...,...,...,...,...,...,...
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,-0.000013
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,-0.000007
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,-0.000013
3090164,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016,-0.000033


In [6]:
# データセットを切り捨てる
USDJPY_ti.set_index('Times', inplace=True)

start_date = '2018-01-01 18:15:00'
end_date = '2023-11-17 16:55:00'
USDJPY_ti = USDJPY_ti.loc[start_date:end_date]

USDJPY_ti.reset_index(inplace=True)

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 18:15:00,112.649,0.225624,112.664554,112.560679,112.612617,50.495050,0.000036,0.004,0.000124
1,2018-01-01 18:16:00,112.648,0.225604,112.665024,112.560476,112.612750,52.658228,0.000187,0.021,0.000018
2,2018-01-01 18:17:00,112.649,0.225624,112.666004,112.560163,112.613083,49.462366,-0.000035,-0.004,0.000036
3,2018-01-01 18:18:00,112.659,0.225821,112.665810,112.560257,112.613033,47.777778,-0.000142,-0.016,0.000151
4,2018-01-01 18:19:00,112.651,0.225663,112.665331,112.560536,112.612933,48.913043,-0.000071,-0.008,0.000053
...,...,...,...,...,...,...,...,...,...,...
3090155,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,-0.000013
3090156,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,-0.000007
3090157,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,-0.000013
3090158,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016,-0.000033


In [7]:
# 'Times'列を日時型に変換
USDJPY_ti['Times'] = pd.to_datetime(USDJPY_ti['Times'])

# 時間が整数の時刻の行を抽出
USDJPY_ti_5m = USDJPY_ti[USDJPY_ti['Times'].dt.minute % 5 == 0]

# Reset the index to start from 0
USDJPY_ti_5m = USDJPY_ti_5m.reset_index(drop=True)
# USDJPY_hour = USDJPY_hour[['Times', 'Final Price', 'Return', 'Label']]

display(USDJPY_ti_5m[:60])

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 18:15:00,112.649,0.225624,112.664554,112.560679,112.612617,50.49505,3.6e-05,0.004,0.000124
1,2018-01-01 18:20:00,112.654,0.225723,112.664099,112.561235,112.612667,53.731343,0.000222,0.025,4.4e-05
2,2018-01-01 18:25:00,112.689,0.226412,112.677712,112.555821,112.616767,62.616822,0.000719,0.081,0.000311
3,2018-01-01 18:30:00,112.735,0.227319,112.711163,112.541637,112.6264,72.463768,0.001376,0.155,0.000408
4,2018-01-01 18:35:00,112.723,0.227083,112.736827,112.53834,112.637583,67.605634,0.00111,0.125,-0.000106
5,2018-01-01 18:40:00,112.738,0.227378,112.758422,112.540811,112.649617,70.054945,0.001296,0.146,0.000133
6,2018-01-01 18:45:00,112.777,0.228147,112.781095,112.544205,112.66265,72.32376,0.001518,0.171,0.000346
7,2018-01-01 18:50:00,112.776,0.228128,112.805315,112.551051,112.678183,75.066079,0.001683,0.189667,-9e-06
8,2018-01-01 18:55:00,112.784,0.228285,112.818097,112.569336,112.693717,74.365482,0.001704,0.192,7.1e-05
9,2018-01-01 19:00:00,112.768,0.22797,112.824088,112.593812,112.70895,71.929825,0.001553,0.175,-0.000142


In [8]:
# もしリターンが０、もしくは０以上であれば、その時点のリターンはプラスです。それ以外の場合はマイナス
# Label＿nowは今この時点のリターン、Labelは次の時点のリターン
USDJPY_ti_5m['Label_now'] = np.where(USDJPY_ti_5m['Return'] >= 0, 1, 0)
USDJPY_ti_5m['Label'] = USDJPY_ti_5m['Label_now'].shift(-1)

USDJPY_ti_5m = USDJPY_ti_5m.drop(USDJPY_ti_5m.index[-1])

USDJPY_ti_5m['Label'] = USDJPY_ti_5m['Label'].astype(int)

display(USDJPY_ti_5m)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return,Label_now,Label
0,2018-01-01 18:15:00,112.649,0.225624,112.664554,112.560679,112.612617,50.495050,0.000036,0.004,0.000124,1,1
1,2018-01-01 18:20:00,112.654,0.225723,112.664099,112.561235,112.612667,53.731343,0.000222,0.025,0.000044,1,1
2,2018-01-01 18:25:00,112.689,0.226412,112.677712,112.555821,112.616767,62.616822,0.000719,0.081,0.000311,1,1
3,2018-01-01 18:30:00,112.735,0.227319,112.711163,112.541637,112.626400,72.463768,0.001376,0.155,0.000408,1,0
4,2018-01-01 18:35:00,112.723,0.227083,112.736827,112.538340,112.637583,67.605634,0.001110,0.125,-0.000106,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
618026,2023-11-17 16:25:00,149.701,0.956058,149.716838,149.638779,149.677808,55.806452,0.000241,0.036,0.000000,1,0
618027,2023-11-17 16:30:00,149.668,0.955407,149.718025,149.641358,149.679692,50.299401,0.000013,0.002,-0.000220,0,1
618028,2023-11-17 16:35:00,149.689,0.955821,149.718230,149.643920,149.681075,53.409091,0.000160,0.024,0.000140,1,0
618029,2023-11-17 16:40:00,149.658,0.955210,149.718819,149.643964,149.681392,48.238482,-0.000087,-0.013,-0.000207,0,0


In [9]:
USDJPY_ti_5m = USDJPY_ti_5m[['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum','Label']]
display(USDJPY_ti_5m)

# 処理済みのデータを保存する
USDJPY_ti_5m.to_csv('USDJPY_ti_5m.csv', index=False)

Unnamed: 0,Final Price,Return,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Label
0,112.649,0.000124,112.664554,112.560679,112.612617,50.495050,0.000036,0.004,1
1,112.654,0.000044,112.664099,112.561235,112.612667,53.731343,0.000222,0.025,1
2,112.689,0.000311,112.677712,112.555821,112.616767,62.616822,0.000719,0.081,1
3,112.735,0.000408,112.711163,112.541637,112.626400,72.463768,0.001376,0.155,0
4,112.723,-0.000106,112.736827,112.538340,112.637583,67.605634,0.001110,0.125,1
...,...,...,...,...,...,...,...,...,...
618026,149.701,0.000000,149.716838,149.638779,149.677808,55.806452,0.000241,0.036,0
618027,149.668,-0.000220,149.718025,149.641358,149.679692,50.299401,0.000013,0.002,1
618028,149.689,0.000140,149.718230,149.643920,149.681075,53.409091,0.000160,0.024,0
618029,149.658,-0.000207,149.718819,149.643964,149.681392,48.238482,-0.000087,-0.013,0


In [10]:
usdjpy = pd.read_csv('USDJPY_ti_5m.csv', sep=',')

display(usdjpy)

Unnamed: 0,Final Price,Return,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Label
0,112.649,0.000124,112.664554,112.560679,112.612617,50.495050,0.000036,0.004,1
1,112.654,0.000044,112.664099,112.561235,112.612667,53.731343,0.000222,0.025,1
2,112.689,0.000311,112.677712,112.555821,112.616767,62.616822,0.000719,0.081,1
3,112.735,0.000408,112.711163,112.541637,112.626400,72.463768,0.001376,0.155,0
4,112.723,-0.000106,112.736827,112.538340,112.637583,67.605634,0.001110,0.125,1
...,...,...,...,...,...,...,...,...,...
618026,149.701,0.000000,149.716838,149.638779,149.677808,55.806452,0.000241,0.036,0
618027,149.668,-0.000220,149.718025,149.641358,149.679692,50.299401,0.000013,0.002,1
618028,149.689,0.000140,149.718230,149.643920,149.681075,53.409091,0.000160,0.024,0
618029,149.658,-0.000207,149.718819,149.643964,149.681392,48.238482,-0.000087,-0.013,0


## モデルの構築

In [11]:
# ファイルからストリームを作成して、データを1つずつモデルに入力する
stream = FileStream('USDJPY_ti_5m.csv',target_idx=-1)

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
nominal_attributes = ['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum']

# 指定された名義属性を持つ ExtremelyFastDecisionTreeClassifier を作成する
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal_attributes)

# モデルの性能を評価するための評価器を作成する
metrics = ['precision', 'recall', 'accuracy', 'kappa', 'f1']
evaluator = EvaluatePrequential(show_plot=False, pretrain_size=90000, max_samples=3090000, metrics=metrics)

# モデルを訓練、評価する
evaluator.evaluate(stream=stream, model=efdt)

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 600000 sample(s).
Evaluating...
 #################### [100%] [15197.40s]
Processed samples: 618000
Mean performance:
M0 - Accuracy     : 0.6456
M0 - Kappa        : 0.3007
M0 - Precision: 0.5949
M0 - Recall: 0.7942
M0 - F1 score: 0.6802


[ExtremelyFastDecisionTreeClassifier(binary_split=False, grace_period=200,
                                     leaf_prediction='nba',
                                     max_byte_size=33554432,
                                     memory_estimate_period=1000000,
                                     min_samples_reevaluate=20, nb_threshold=0,
                                     nominal_attributes=['Final Price', 'Return',
                                                         'Upper Band',
                                                         'Lower Band',
                                                         'Rolling Mean', 'RSI',
                                                         'RCI', 'Momentum'],
                                     split_confidence=1e-07,
                                     split_criterion='info_gain',
                                     stop_mem_management=False,
                                     tie_threshold=0.05)]

## モデルの訓練とテスト

In [12]:
stream.has_more_samples()

True

In [13]:
stream.restart()

In [14]:
# ファイルからストリームを作成して、データを1つずつモデルに入力する
stream = FileStream('USDJPY_ti_5m.csv',target_idx=-1)

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
nominal_attributes = ['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum']

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal_attributes)
train_set = []

# モデルを訓練する
for i in range(600000):  
    X, y = stream.next_sample()
    train_set.append(X)
    efdt.fit(X, y, classes=np.unique(stream.target_values))

In [15]:
# 実際のラベルのリストと予測ラベルのリスト
test_data = []
true_labels = []
predicted_labels = []

# 次の18000サンプルに対する予測を収集
for i in range(18000):
    X, y_true = stream.next_sample()
    y_pred = efdt.predict(X)
    
    test_data.append(X)
    true_labels.append(y_true)
    predicted_labels.append(y_pred)
    
true_labels = np.concatenate(true_labels)
predicted_labels = np.concatenate(predicted_labels)

# 評価メトリクスを計算
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
auc = roc_auc_score(true_labels, predicted_labels)  # AUC for binary classification
aupr = average_precision_score(true_labels, predicted_labels)  # AUC-PR for binary classification

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUPR: {aupr:.4f}")

Accuracy: 0.6280
Precision: 0.7045
Recall: 0.3724
F1 Score: 0.4872
AUC: 0.6156
AUPR: 0.5602


## モデルを解釈する

In [None]:
# 次のデータを用意
x, y_true = stream.next_sample()

In [17]:
# データ訓練済みのモデルに入力して予測を出力する、各ラベルに対する確率
y_prediction = efdt.predict(x)
probability = efdt.predict_proba(x)
print("The input is:", x)
print("The true label is:", y_true)
print("The prediction is:", y_prediction)
print("The probability of each label is:", probability)

The input is: [[ 1.49628000e+02 -8.68783603e-05  1.49699192e+02  1.49606708e+02
   1.49652950e+02  4.25316456e+01 -3.94135471e-04 -5.90000000e-02]]
The true label is: [0]
The prediction is: [0]
The probability of each label is: [[1.00000000e+00 3.59568661e-18]]


In [23]:
# 決定木を可視化する
tree_depth = efdt.measure_tree_depth()
print("The depth of the tree is:",tree_depth)

tree_size = efdt.measure_byte_size()
print("The size of the tree is:",tree_size)

The depth of the tree is: 6
The size of the tree is: 631262


In [19]:
# 決定木を可視化する
tree_structure = efdt.get_model_description()
print(tree_structure)

if Attribute 5 <= 45.45454545454546:
  if Attribute 5 <= 25.701459034793093:
    if Attribute 5 <= 14.603044014808056:
      if Attribute 5 <= 1.5353032177997235:
        if Attribute 1 <= -5.559007448960958e-05:
          Leaf = Class 0 | {0: 26.442548339720815, 1: 18.662739486055216}
        if Attribute 1 > -5.559007448960958e-05:
          Leaf = Class 0 | {0: 3634.557451660279, 1: 13.337260513944784}
      if Attribute 5 > 1.5353032177997235:
        Leaf = Class 0 | {0: 599.2114403959567, 1: 362.949977043843}
    if Attribute 5 > 14.603044014808056:
      if Attribute 5 <= 24.172039766510117:
        if Attribute 1 <= -0.000772497481036691:
          Leaf = Class 1 | {0: 3.9907672535191523, 1: 12.483082657072373}
        if Attribute 1 > -0.000772497481036691:
          Leaf = Class 0 | {0: 335.00923274648085, 1: 103.51691734292763}
      if Attribute 5 > 24.172039766510117:
        if Attribute 1 <= 0.0002975660396439272:
          Leaf = Class 0 | {0: 281.6874447653827, 1: 168.

In [20]:
# モデルの意思決定プロセスを出力する
rules = efdt.get_rules_description()
print(rules)

Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) <= 14.600 and Att (5) <= 1.540 and Att (1) <= -0.000 | class: 0
Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) <= 14.600 and Att (5) <= 1.540 and Att (1) > -0.000 | class: 0
Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) <= 14.600 and Att (5) > 1.540 | class: 0
Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) > 14.600 and Att (5) <= 24.170 and Att (1) <= -0.000 | class: 1
Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) > 14.600 and Att (5) <= 24.170 and Att (1) > -0.000 | class: 0
Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) > 14.600 and Att (5) > 24.170 and Att (1) <= 0.000 | class: 0
Att (5) <= 45.450 and Att (5) <= 25.700 and Att (5) > 14.600 and Att (5) > 24.170 and Att (1) > 0.000 | class: 1
Att (5) <= 45.450 and Att (5) > 25.700 and Att (5) <= 49.860 and Att (1) <= 0.000 and Att (1) <= -0.000 | class: 1
Att (5) <= 45.450 and Att (5) > 25.700 and Att (5) <= 49.860 and Att (1) <= 0.000 and Att (1) > -0.000 