# FXの将来のリターン動向の予測モデル

目的：将来1分ごとのリターン正負の予測  
モデル：Extremely Fast Decision Tree   
開発環境: python 3.8.10/ JupyterLab 2.3.2/ System Information: System: Linux (179-Ubuntu SMP)

## 必要なライブラリをインポート

In [14]:
import pandas as pd
import numpy as np

from skmultiflow.trees import ExtremelyFastDecisionTreeClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.data.file_stream import FileStream
from skmultiflow.utils import get_dimensions

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

Pandas version: 1.5.2   
Numpy version: 1.22.2   
Scikit-learn version: 1.2.0   
Scikit-multiflow version: 0.5.3

## 前処理

In [13]:
# USDJPY通貨ペアデータセットをDataFrameに読み込む
USDJPY_ti = pd.read_csv('USDJPY_ti.csv',sep = ',')

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022
...,...,...,...,...,...,...,...,...,...
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017
3090164,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016


In [14]:
# 1分ごとのリターンを計算する, ln[p(t)/p(t-1)]
USDJPY_ti['Return'] = np.log(USDJPY_ti['Final Price'] / USDJPY_ti['Final Price'].shift(1))
USDJPY_ti['Return'] = USDJPY_ti['Return'].fillna(0)

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059,0.000000
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034,-0.000133
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014,0.000098
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025,-0.000009
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022,-0.000027
...,...,...,...,...,...,...,...,...,...,...
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,-0.000013
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,-0.000013
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,-0.000007
3090164,2023-11-17 16:53:00,149.642,0.954895,149.723195,149.632721,149.677958,47.619048,-0.000107,-0.016,0.000007


In [15]:
# もしリターンが０、もしくは０以上であれば、その時点のリターンはプラスです。それ以外の場合はマイナス
# Label＿nowは今この時点のリターン、Labelは次の時点のリターン
USDJPY_ti['Label_now'] = np.where(USDJPY_ti['Return'] >= 0, 1, 0)
USDJPY_ti['Label'] = USDJPY_ti['Label_now'].shift(-1)

USDJPY_ti = USDJPY_ti.drop(USDJPY_ti.index[-1])

USDJPY_ti['Label'] = USDJPY_ti['Label'].astype(int)

display(USDJPY_ti)

Unnamed: 0,Times,Final Price,Final Price Normalized,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Return,Label_now,Label
0,2018-01-01 18:09:00,112.650,0.225644,112.657555,112.561912,112.609733,56.844548,0.000524,0.059,0.000000,1,0
1,2018-01-01 18:10:00,112.635,0.225348,112.659427,112.561673,112.610550,53.990610,0.000302,0.034,-0.000133,0,1
2,2018-01-01 18:11:00,112.646,0.225565,112.660132,112.561601,112.610867,51.605505,0.000125,0.014,0.000098,1,0
3,2018-01-01 18:12:00,112.645,0.225545,112.661314,112.561253,112.611283,52.796421,0.000222,0.025,-0.000009,0,0
4,2018-01-01 18:13:00,112.642,0.225486,112.662409,112.560958,112.611683,52.466368,0.000196,0.022,-0.000027,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
3090160,2023-11-17 16:49:00,149.646,0.954974,149.721699,149.636085,149.678892,48.961424,-0.000047,-0.007,-0.000007,0,0
3090161,2023-11-17 16:50:00,149.644,0.954934,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,-0.000013,0,0
3090162,2023-11-17 16:51:00,149.642,0.954895,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,-0.000013,0,0
3090163,2023-11-17 16:52:00,149.641,0.954875,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,-0.000007,0,1


In [None]:
USDJPY_minute_ti = USDJPY_ti[['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum','Label']]
display(USDJPY_minute_ti)

# 処理済みのデータを保存する
USDJPY_minute_ti.to_csv('USDJPY_minute_ti.csv', index=False)

Unnamed: 0,Final Price,Return,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Label
0,112.650,0.000000,112.657555,112.561912,112.609733,56.844548,0.000524,0.059,0
1,112.635,-0.000133,112.659427,112.561673,112.610550,53.990610,0.000302,0.034,1
2,112.646,0.000098,112.660132,112.561601,112.610867,51.605505,0.000125,0.014,0
3,112.645,-0.000009,112.661314,112.561253,112.611283,52.796421,0.000222,0.025,0
4,112.642,-0.000027,112.662409,112.560958,112.611683,52.466368,0.000196,0.022,1
...,...,...,...,...,...,...,...,...,...
3090160,149.646,-0.000007,149.721699,149.636085,149.678892,48.961424,-0.000047,-0.007,0
3090161,149.644,-0.000013,149.721929,149.635588,149.678758,48.203593,-0.000080,-0.012,0
3090162,149.642,-0.000013,149.722299,149.634751,149.678525,47.916667,-0.000093,-0.014,0
3090163,149.641,-0.000007,149.722731,149.633786,149.678258,47.477745,-0.000113,-0.017,1


In [2]:
USDJPY_minute_ti = pd.read_csv('USDJPY_minute_ti.csv', sep=',')
display(USDJPY_minute_ti)

Unnamed: 0,Final Price,Return,Upper Band,Lower Band,Rolling Mean,RSI,RCI,Momentum,Label
0,112.650000,0.000000e+00,112.657555,112.561912,112.609733,56.844548,0.000524,0.0590,0
1,112.635000,-1.331647e-04,112.659427,112.561673,112.610550,53.990610,0.000302,0.0340,1
2,112.646000,9.765582e-05,112.660132,112.561601,112.610867,51.605505,0.000125,0.0140,0
3,112.645000,-8.877407e-06,112.661314,112.561253,112.611283,52.796421,0.000222,0.0250,0
4,112.642000,-2.663270e-05,112.662409,112.560958,112.611683,52.466368,0.000196,0.0220,1
...,...,...,...,...,...,...,...,...,...
1893004,110.266817,2.612703e-07,110.266945,110.264932,110.265939,100.000000,0.000015,0.0017,1
1893005,110.266846,2.612702e-07,110.266974,110.264961,110.265968,100.000000,0.000015,0.0017,1
1893006,110.266875,2.612702e-07,110.267003,110.264990,110.265996,100.000000,0.000015,0.0017,1
1893007,110.266904,2.612701e-07,110.267031,110.265019,110.266025,100.000000,0.000015,0.0017,1


## モデルの構築

In [10]:
# ファイルからストリームを作成して、データを1つずつモデルに入力する
stream = FileStream('USDJPY_minute_ti.csv',target_idx=-1)

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
nominal_attributes = ['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum']

# 指定された名義属性を持つ ExtremelyFastDecisionTreeClassifier を作成する
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal_attributes)

# モデルの性能を評価するための評価器を作成する
metrics = ['precision', 'recall', 'accuracy', 'kappa', 'f1']
evaluator = EvaluatePrequential(show_plot=False, pretrain_size=90000, max_samples=3090000, metrics=metrics)

# モデルを訓練、評価する
evaluator.evaluate(stream=stream, model=efdt)

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 90000 sample(s).
Evaluating...
 #################### [100%] [86392.92s]
Processed samples: 3090000
Mean performance:
M0 - Accuracy     : 0.6678
M0 - Kappa        : 0.3275
M0 - Precision: 0.6378
M0 - Recall: 0.8291
M0 - F1 score: 0.7210


[ExtremelyFastDecisionTreeClassifier(binary_split=False, grace_period=200,
                                     leaf_prediction='nba',
                                     max_byte_size=33554432,
                                     memory_estimate_period=1000000,
                                     min_samples_reevaluate=20, nb_threshold=0,
                                     nominal_attributes=['Final Price', 'Return',
                                                         'Upper Band',
                                                         'Lower Band',
                                                         'Rolling Mean', 'RSI',
                                                         'RCI', 'Momentum'],
                                     split_confidence=1e-07,
                                     split_criterion='info_gain',
                                     stop_mem_management=False,
                                     tie_threshold=0.05)]

## モデルの訓練とテスト

In [None]:
stream.restart()

In [3]:
# ファイルからストリームを作成して、データを1つずつモデルに入力する
stream = FileStream('USDJPY_minute_ti.csv',target_idx=-1)

# 使用する属性を指定: "Final Price", "Return", 'Upper Band', 'Lower Band', 'Rolling Mean', 'RSI', 'RCI', 'Momentum'
nominal_attributes = ['Final Price','Return','Upper Band','Lower Band','Rolling Mean','RSI','RCI','Momentum']

# 指定された名義属性を持つ ExtremelyFastDecisionTreeClassifierを作成する
efdt = ExtremelyFastDecisionTreeClassifier(nominal_attributes=nominal_attributes)
train_set = []

# モデルを訓練する
for i in range(90000):  
    X, y = stream.next_sample()
    train_set.append(X)
    efdt.fit(X, y, classes=np.unique(stream.target_values))

In [4]:
# 実際のラベルのリストと予測ラベルのリスト
test_data = []
true_labels = []
predicted_labels = []

# 次の30000サンプルに対する予測を収集
for i in range(30000):
    X, y_true = stream.next_sample()
    y_pred = efdt.predict(X)
    
    test_data.append(X)
    true_labels.append(y_true)
    predicted_labels.append(y_pred)
    
true_labels = np.concatenate(true_labels)
predicted_labels = np.concatenate(predicted_labels)

# 評価メトリクスを計算
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)
auc = roc_auc_score(true_labels, predicted_labels)  # AUC for binary classification
aupr = average_precision_score(true_labels, predicted_labels)  # AUC-PR for binary classification

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC: {auc:.4f}")
print(f"AUPR: {aupr:.4f}")

Accuracy: 0.6573
Precision: 0.5278
Recall: 0.7927
F1 Score: 0.6337
AUC: 0.6845
AUPR: 0.4959


## モデルを解釈する

In [5]:
# 次のデータを用意
x, y_true = stream.next_sample()

In [6]:
# データ訓練済みのモデルに入力して予測を出力する、各ラベルに対する確率
y_prediction = efdt.predict(x)
probability = efdt.predict_proba(x)
print("The input is:", x)
print("The true label is:", y_true)
print("The prediction is:", y_prediction)
print("The probability of each label is:", probability)

The input is: [[ 1.05029000e+02 -6.66460382e-05  1.05066270e+02  1.04856396e+02
   1.04961333e+02  6.56660413e+01  1.59161946e-03  1.67000000e-01]]
The true label is: [0]
The prediction is: [0]
The probability of each label is: [[0.52363907 0.47636093]]


In [7]:
# 決定木を可視化する
tree_structure = efdt.get_model_description()
print(tree_structure)

if Attribute 5 <= 63.63636363636364:
  if Attribute 5 <= 35.59461686349996:
    if Attribute 5 <= 22.650231124807274:
      if Attribute 2 <= 105.91294305789833:
        Leaf = Class 0 | {0: 2717.659415816229, 1: 2.0}
      if Attribute 2 > 105.91294305789833:
        Leaf = Class 0 | {0: 109.34058418377117, 1: 25.0}
    if Attribute 5 > 22.650231124807274:
      Leaf = Class 1 | {0: 667.0838870400626, 1: 842.7159034850163}
  if Attribute 5 > 35.59461686349996:
    if Attribute 1 <= -1.6071963096627286e-05:
      if Attribute 1 <= -0.00026066825882373776:
        Leaf = Class 1 | {0: 1331.8758244840428, 1: 1601.8145176945254}
      if Attribute 1 > -0.00026066825882373776:
        if Attribute 1 <= -0.00012855773414248347:
          Leaf = Class 1 | {0: 426.47679074294865, 1: 559.0846341643482}
        if Attribute 1 > -0.00012855773414248347:
          if Attribute 1 <= -7.581651894640986e-05:
            Leaf = Class 1 | {0: 169.25667359493673, 1: 185.4970407555811}
          if Attr

In [9]:
# モデルの意思決定プロセスを出力する
rules = efdt.get_rules_description()
print(rules)

Att (5) <= 63.640 and Att (5) <= 35.590 and Att (5) <= 22.650 and Att (2) <= 105.910 | class: 0
Att (5) <= 63.640 and Att (5) <= 35.590 and Att (5) <= 22.650 and Att (2) > 105.910 | class: 0
Att (5) <= 63.640 and Att (5) <= 35.590 and Att (5) > 22.650 | class: 1
Att (5) <= 63.640 and Att (5) > 35.590 and Att (1) <= -0.000 and Att (1) <= -0.000 | class: 1
Att (5) <= 63.640 and Att (5) > 35.590 and Att (1) <= -0.000 and Att (1) > -0.000 and Att (1) <= -0.000 | class: 1
Att (5) <= 63.640 and Att (5) > 35.590 and Att (1) <= -0.000 and Att (1) > -0.000 and Att (1) > -0.000 and Att (1) <= -0.000 | class: 1
Att (5) <= 63.640 and Att (5) > 35.590 and Att (1) <= -0.000 and Att (1) > -0.000 and Att (1) > -0.000 and Att (1) > -0.000 | class: 0
Att (5) <= 63.640 and Att (5) > 35.590 and Att (1) > -0.000 and Att (1) <= 0.000 and Att (6) <= 0.000 | class: 1
Att (5) <= 63.640 and Att (5) > 35.590 and Att (1) > -0.000 and Att (1) <= 0.000 and Att (6) > 0.000 | class: 0
Att (5) <= 63.640 and Att (5) > 