In [1]:
import importlib

if importlib.util.find_spec('gpytorch') is None:
    !pip install gpytorch
else:
    print('gpytorch is already installed')

import pandas as pd
from data_preprocessing import S3DataFrame, DataConverter  # カスタムデータクラス
from typing import Callable  # lambda関数での型アノテーション

# 必要なライブラリのインポート

import torch
import time
import csv
from baseGPR import GPModel
from localUtil import mk_data_for_gpr, mk_prediction

import requests

def is_running_on_aws():
    try:
        # AWS EC2 instances have a metadata service at this IP address
        response = requests.get('http://169.254.169.254/', timeout=1)
        return response.status_code == 200
    except requests.exceptions.RequestException:
        return False

gpytorch is already installed


In [2]:
# ファイルの在処
if is_running_on_aws():
    bucket_name = 'sharebucket-aiml'
    object_name = 'dc-to-risk/market/MarketDataForFxModel.csv'
    s3df = S3DataFrame(bucket_name, object_name)
else:
    file_name = 'MarketDataForFxModel.csv'
    s3df = S3DataFrame.genDataFrameObjFromLocalFile(file_name)

In [3]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using {} device'.format(device))

Using cpu device


In [5]:
# 推定・予測の設定、初期化

start_date = '2018-01-01'
end_date = '2018-01-10'

n_iter = 100

# 基本データ　為替は予測対象なのでシフト対象、株式,為替レートは説明変数
ndays = 20
header_w_shift_list = ['USDJPY']
header_wo_shift_list = ['USDJPY', 'NK225', 'SP500']

mk_feature_set: Callable[[int], pd.DataFrame] = lambda dt: ['date', f'NK225_Return_{dt}', f'SP500_Return_{dt}', 'NK225VolatilityIndex', 'SPXVolatilityIndex' , f'USDJPY_Return_{dt}', 'USDJPYIV1MATM', 'XI', 'DFd', 'DFf']

base_data = DataConverter(s3df=s3df, ndays=ndays, feature_set=mk_feature_set(ndays), rtn_set_w_shft=header_w_shift_list, rtn_set_wo_shft=header_wo_shift_list)

lossess = []
dates = []
preds = []
probs = []
stds = []
acts = []

seed = 0
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
# np.random.seed(seed)

model_types = ["RBF", "Matern"]

model_type = 0
nu = 2.5  # Matern Kernelの自由度

  rtndf[new_header] = rtndf[header].pct_change(periods=n)
  rtndf[new_header] = rtndf[header].pct_change(periods=n)


In [6]:
# gpytorch を使ったガウス過程回帰による推定・予測
for base_date in base_data.get_base_dates(start_date, end_date):
    start_time = time.time()
    train_prem, train_feature, train_data, test_feature, test_data = mk_data_for_gpr(base_data, base_date, device)
    # データの特徴とターゲットの学習
    # モデルの設定
    match model_types[model_type]:
        case "RBF":
            model, likelihood, trainer = GPModel.standard_initialize(train_feature, train_prem, device)
        case "Matern":
            model, likelihood, trainer = GPModel.keoops_Matern_initialize(train_feature, train_prem, nu, device)
        case _:
            raise Exception("unexpected mode is specified.")

    # モデルの学習
    losses = trainer.train(model, likelihood, train_feature, train_prem, n_iter=n_iter)
    lossess.append(losses[-1])

    # テストデータに対する予測
    mean, varinace = trainer.predict(test_feature[0].view(1, test_feature.shape[1]))
    fxrtn_act, fxrtn_mean, fxrtn_std, probability_less_than_x = mk_prediction(mean, varinace, test_data.iloc[0])
    dates.append(base_date)
    preds.append(fxrtn_mean)
    probs.append(probability_less_than_x)
    stds.append(fxrtn_std)
    acts.append(fxrtn_act)

    print(f'{base_date.strftime("%Y-%m-%d")} Learning time: %.3f' % (time.time() - start_time))

rst_df = pd.DataFrame({'Date': dates, 'Actual': acts, 'Predict': preds, 'ProbAboveZero': probs, 'Std': stds})

print(rst_df)

2018-01-04 Learning time: 72.728
2018-01-05 Learning time: 66.647
2018-01-09 Learning time: 64.997
        Date    Actual   Predict  ProbAboveZero       Std
0 2018-01-04 -0.024312  0.006084       0.622502  0.019498
1 2018-01-05 -0.015565  0.001875       0.536723  0.020341
2 2018-01-09 -0.013330  0.004138       0.581829  0.020030


In [None]:
# 予測結果の保存
rst_df.to_csv(f'rst_{start_date}_{end_date}.csv')

# 損失の保存
with open(f'損失_{start_date}_{end_date}.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    # リストを書き込む
    writer.writerow(lossess)

# numpy配列をpandasのDataFrameに変換します
df = pd.DataFrame(train_prem.cpu().numpy())
# DataFrameをCSVファイルに保存します
df.to_csv('premiums.csv', index=False)

# numpy配列をpandasのDataFrameに変換します
df = pd.DataFrame(train_feature.cpu().numpy())
# DataFrameをCSVファイルに保存します
df.to_csv('features.csv', index=False)