In [6]:
import os
import yfinance as yf
import pandas as pd
import streamlit as st
from datetime import datetime, timedelta

import yfinance as yf
import pandas as pd
import numpy as np
import streamlit as st
import os
from datetime import datetime, timedelta
from typing import List

import warnings
from pandas.errors import PerformanceWarning

warnings.filterwarnings('ignore', category=PerformanceWarning)


In [7]:
if not os.path.exists('./data'):
    os.makedirs('./data')

def download_stock_data(symbols: List[str], target_symbols: List[str], start_date: str, end_date: str):
    all_symbols = list(symbols + target_symbols)
    data = pd.DataFrame()
    remove_symbols = []

    # Download JPY=X data first
    jpy_data = yf.download("JPY=X", start=start_date, end=end_date, interval='1d')
    jpy_data = jpy_data["Adj Close"]

    for symbol in list(set(all_symbols)):
        file_name = f"./data/{symbol}.csv"
        if os.path.isfile(file_name):
            stock_data = pd.read_csv(file_name, index_col=0, parse_dates=True)
            stock_data.index = pd.to_datetime(stock_data.index)
            last_date = stock_data.index[-1]
            if last_date >= pd.Timestamp(datetime.now()):
                new_data = yf.download(symbol, start=last_date.date() + timedelta(days=1), end=end_date, interval='1d')
                if not new_data.empty and not pd.isna(new_data['Adj Close'].iloc[-1]):
                    # Convert US stocks to JPY
                    if symbol[-2:] != ".T" and symbol != "JPY=X":
                        stock_data = stock_data.mul(jpy_data, axis=0)
                    stock_data = stock_data.append(new_data)
                    stock_data = stock_data[~stock_data.index.duplicated(keep='last')]
                    with open(file_name, mode='w') as f:
                        stock_data.to_csv(f)
                else:
                    remove_symbols.append(symbol)
        else:
            stock_data = yf.download(symbol, start=start_date, end=end_date, interval='1d')
            if not stock_data.empty:
                # Convert US stocks to JPY
                if symbol[-2:] != ".T" and symbol != "JPY=X":
                    stock_data = stock_data.mul(jpy_data, axis=0)
                with open(file_name, mode='w') as f:
                    stock_data.to_csv(f)
            else:
                remove_symbols.append(symbol)

        try:
            data[symbol] = stock_data['Adj Close']
        except Exception as e:
            remove_symbols.append(symbol)
            pass

    for item in list(set(remove_symbols)):
        if item in symbols:
            symbols.remove(item)
        if item in target_symbols:
            target_symbols.remove(item)

    with open("./feat_symbols.txt", "w") as f:
        for symbol in symbols:
            f.write(f"{symbol}\n")
    with open("./target_symbols.txt", "w") as f:
        for symbol in target_symbols:
            f.write(f"{symbol}\n")

    return data, symbols, target_symbols


def prepare_data(data, symbols, target_symbols):
    prepared_data = {}
    for target_symbol in target_symbols:
        stock_data = pd.DataFrame(data[target_symbol])
        for symbol in symbols:
            if symbol != target_symbol:
                stock_data[symbol] = data[symbol]
        prepared_data[target_symbol] = stock_data
    
    for key in prepared_data.keys():

        # NoneをNaNに置換
        prepared_data[key] = prepared_data[key].replace(to_replace=['None', 'null', 'nan', 'NA'], value=np.nan)

        # 欠損値NaNを過去最新の値で埋める
        prepared_data[key] = prepared_data[key].fillna(method="ffill")
        prepared_data[key] = prepared_data[key].dropna(axis=1)            

        if prepared_data[key].isnull().any().any():
            st.write(f"{key} : NaN values found.")
    
    return prepared_data

In [8]:
# 銘柄リストを取得する関数
def get_symbol_list(file):
    with open(file, "r") as f:
        symbols = [line.strip() for line in f.readlines()]
    return symbols

In [9]:
def get_prepared_data(symbols, target_symbols, years, shift):
    end_date = datetime.now()
    start_date = end_date - timedelta(days=years*365)
    data, symbols, target_symbols = download_stock_data(symbols, target_symbols, start_date, end_date)
    prepared_data = prepare_data(data, symbols, target_symbols, shift=shift)

    return prepared_data

In [10]:
years = 5 # 学習データの期間

symbols = get_symbol_list("feat_symbols.txt")
target_symbols = get_symbol_list("target_symbols.txt")

dict_prepared_data = get_prepared_data(symbols, target_symbols, years)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [11]:
import pickle

with open("tests/prepared_data.pkl", "wb") as tf:
    pickle.dump(dict_prepared_data,tf)

with open("tests/prepared_data.pkl", "rb") as tf:
    dict_prepared_data = pickle.load(tf)
    
for key in dict_prepared_data.keys():
    display(dict_prepared_data[key])

Unnamed: 0_level_0,4584.T,JPY=X,1332.T,1605.T,1721.T,1801.T,1802.T,1803.T,1808.T,1812.T,...,WHR,WMB,WYNN,XEL,XRX,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-23,1084.0,110.725998,508.962372,1074.314209,2499.224609,5158.541016,965.129883,1015.991211,1290.213257,1543.206055,...,15445.092340,2249.993004,20435.929505,4267.705721,2473.280057,7481.764094,8456.382147,11660.672839,5603.723287,9100.646914
2018-05-24,1084.5,109.869003,499.000366,1059.467529,2519.971680,5167.065918,945.691895,1004.897339,1276.855469,1523.098877,...,14055.950683,2192.264324,20177.730137,4280.495849,2461.141702,7471.585106,8429.307655,11470.843817,5545.974325,9016.381583
2018-05-25,1070.5,109.311996,495.377899,1029.774414,2532.075195,5150.013672,960.059021,1003.048584,1295.713867,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-28,1080.5,109.623001,488.132965,1002.555603,2532.939697,5226.751953,964.284851,1008.595520,1300.428223,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-29,1037.5,109.389000,485.416046,998.844177,2547.635254,5201.171875,954.988220,1003.048584,1296.499512,1523.098877,...,13900.015957,2140.140969,20062.602902,4300.762748,2419.007724,7288.119736,8235.630008,11168.275387,5241.171231,8807.512849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-09,243.0,135.072998,601.000000,1469.000000,2628.000000,4775.000000,1163.000000,838.000000,1679.000000,1861.000000,...,18361.823684,3977.899896,15087.653470,9334.894977,1996.378875,14425.796604,18587.395344,18376.681796,3298.482623,24903.407990
2023-05-10,241.0,135.287994,596.000000,1458.000000,2646.000000,4780.000000,1172.000000,845.000000,1675.000000,1855.000000,...,18063.653588,3944.997896,15084.611374,9375.458424,1934.618346,14450.110350,18634.569090,18591.275941,3213.089867,25047.219198
2023-05-11,244.0,134.087006,587.000000,1476.000000,2661.000000,4730.000000,1089.000000,827.000000,1659.000000,1813.000000,...,17755.801038,3888.523163,14333.901105,9201.050694,1882.581554,14095.226399,18415.508860,18328.353125,3041.093328,24822.185825
2023-05-12,241.0,134.498001,622.000000,1460.000000,2720.000000,4735.000000,1107.000000,844.000000,1671.000000,1836.000000,...,17564.093471,3938.101565,14174.744254,9265.567214,1903.146664,14088.665615,18629.317393,18392.601650,3016.790206,24696.522305


Unnamed: 0_level_0,1557.T,JPY=X,1332.T,1605.T,1721.T,1801.T,1802.T,1803.T,1808.T,1812.T,...,WHR,WMB,WYNN,XEL,XRX,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-23,30100.0,110.725998,508.962372,1074.314209,2499.224609,5158.541016,965.129883,1015.991211,1290.213257,1543.206055,...,15445.092340,2249.993004,20435.929505,4267.705721,2473.280057,7481.764094,8456.382147,11660.672839,5603.723287,9100.646914
2018-05-24,30000.0,109.869003,499.000366,1059.467529,2519.971680,5167.065918,945.691895,1004.897339,1276.855469,1523.098877,...,14055.950683,2192.264324,20177.730137,4280.495849,2461.141702,7471.585106,8429.307655,11470.843817,5545.974325,9016.381583
2018-05-25,29980.0,109.311996,495.377899,1029.774414,2532.075195,5150.013672,960.059021,1003.048584,1295.713867,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-28,29940.0,109.623001,488.132965,1002.555603,2532.939697,5226.751953,964.284851,1008.595520,1300.428223,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-29,29740.0,109.389000,485.416046,998.844177,2547.635254,5201.171875,954.988220,1003.048584,1296.499512,1523.098877,...,13900.015957,2140.140969,20062.602902,4300.762748,2419.007724,7288.119736,8235.630008,11168.275387,5241.171231,8807.512849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-09,55640.0,135.072998,601.000000,1469.000000,2628.000000,4775.000000,1163.000000,838.000000,1679.000000,1861.000000,...,18361.823684,3977.899896,15087.653470,9334.894977,1996.378875,14425.796604,18587.395344,18376.681796,3298.482623,24903.407990
2023-05-10,55680.0,135.287994,596.000000,1458.000000,2646.000000,4780.000000,1172.000000,845.000000,1675.000000,1855.000000,...,18063.653588,3944.997896,15084.611374,9375.458424,1934.618346,14450.110350,18634.569090,18591.275941,3213.089867,25047.219198
2023-05-11,55600.0,134.087006,587.000000,1476.000000,2661.000000,4730.000000,1089.000000,827.000000,1659.000000,1813.000000,...,17755.801038,3888.523163,14333.901105,9201.050694,1882.581554,14095.226399,18415.508860,18328.353125,3041.093328,24822.185825
2023-05-12,55650.0,134.498001,622.000000,1460.000000,2720.000000,4735.000000,1107.000000,844.000000,1671.000000,1836.000000,...,17564.093471,3938.101565,14174.744254,9265.567214,1903.146664,14088.665615,18629.317393,18392.601650,3016.790206,24696.522305


Unnamed: 0_level_0,8789.T,JPY=X,1332.T,1605.T,1721.T,1801.T,1802.T,1803.T,1808.T,1812.T,...,WHR,WMB,WYNN,XEL,XRX,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-23,92.0,110.725998,508.962372,1074.314209,2499.224609,5158.541016,965.129883,1015.991211,1290.213257,1543.206055,...,15445.092340,2249.993004,20435.929505,4267.705721,2473.280057,7481.764094,8456.382147,11660.672839,5603.723287,9100.646914
2018-05-24,90.0,109.869003,499.000366,1059.467529,2519.971680,5167.065918,945.691895,1004.897339,1276.855469,1523.098877,...,14055.950683,2192.264324,20177.730137,4280.495849,2461.141702,7471.585106,8429.307655,11470.843817,5545.974325,9016.381583
2018-05-25,89.0,109.311996,495.377899,1029.774414,2532.075195,5150.013672,960.059021,1003.048584,1295.713867,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-28,89.0,109.623001,488.132965,1002.555603,2532.939697,5226.751953,964.284851,1008.595520,1300.428223,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-29,88.0,109.389000,485.416046,998.844177,2547.635254,5201.171875,954.988220,1003.048584,1296.499512,1523.098877,...,13900.015957,2140.140969,20062.602902,4300.762748,2419.007724,7288.119736,8235.630008,11168.275387,5241.171231,8807.512849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-09,80.0,135.072998,601.000000,1469.000000,2628.000000,4775.000000,1163.000000,838.000000,1679.000000,1861.000000,...,18361.823684,3977.899896,15087.653470,9334.894977,1996.378875,14425.796604,18587.395344,18376.681796,3298.482623,24903.407990
2023-05-10,79.0,135.287994,596.000000,1458.000000,2646.000000,4780.000000,1172.000000,845.000000,1675.000000,1855.000000,...,18063.653588,3944.997896,15084.611374,9375.458424,1934.618346,14450.110350,18634.569090,18591.275941,3213.089867,25047.219198
2023-05-11,69.0,134.087006,587.000000,1476.000000,2661.000000,4730.000000,1089.000000,827.000000,1659.000000,1813.000000,...,17755.801038,3888.523163,14333.901105,9201.050694,1882.581554,14095.226399,18415.508860,18328.353125,3041.093328,24822.185825
2023-05-12,68.0,134.498001,622.000000,1460.000000,2720.000000,4735.000000,1107.000000,844.000000,1671.000000,1836.000000,...,17564.093471,3938.101565,14174.744254,9265.567214,1903.146664,14088.665615,18629.317393,18392.601650,3016.790206,24696.522305


Unnamed: 0_level_0,1893.T,JPY=X,1332.T,1605.T,1721.T,1801.T,1802.T,1803.T,1808.T,1812.T,...,WHR,WMB,WYNN,XEL,XRX,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-23,629.252991,110.725998,508.962372,1074.314209,2499.224609,5158.541016,965.129883,1015.991211,1290.213257,1543.206055,...,15445.092340,2249.993004,20435.929505,4267.705721,2473.280057,7481.764094,8456.382147,11660.672839,5603.723287,9100.646914
2018-05-24,611.323120,109.869003,499.000366,1059.467529,2519.971680,5167.065918,945.691895,1004.897339,1276.855469,1523.098877,...,14055.950683,2192.264324,20177.730137,4280.495849,2461.141702,7471.585106,8429.307655,11470.843817,5545.974325,9016.381583
2018-05-25,602.785095,109.311996,495.377899,1029.774414,2532.075195,5150.013672,960.059021,1003.048584,1295.713867,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-28,604.492798,109.623001,488.132965,1002.555603,2532.939697,5226.751953,964.284851,1008.595520,1300.428223,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-29,601.077515,109.389000,485.416046,998.844177,2547.635254,5201.171875,954.988220,1003.048584,1296.499512,1523.098877,...,13900.015957,2140.140969,20062.602902,4300.762748,2419.007724,7288.119736,8235.630008,11168.275387,5241.171231,8807.512849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-09,680.000000,135.072998,601.000000,1469.000000,2628.000000,4775.000000,1163.000000,838.000000,1679.000000,1861.000000,...,18361.823684,3977.899896,15087.653470,9334.894977,1996.378875,14425.796604,18587.395344,18376.681796,3298.482623,24903.407990
2023-05-10,681.000000,135.287994,596.000000,1458.000000,2646.000000,4780.000000,1172.000000,845.000000,1675.000000,1855.000000,...,18063.653588,3944.997896,15084.611374,9375.458424,1934.618346,14450.110350,18634.569090,18591.275941,3213.089867,25047.219198
2023-05-11,675.000000,134.087006,587.000000,1476.000000,2661.000000,4730.000000,1089.000000,827.000000,1659.000000,1813.000000,...,17755.801038,3888.523163,14333.901105,9201.050694,1882.581554,14095.226399,18415.508860,18328.353125,3041.093328,24822.185825
2023-05-12,678.000000,134.498001,622.000000,1460.000000,2720.000000,4735.000000,1107.000000,844.000000,1671.000000,1836.000000,...,17564.093471,3938.101565,14174.744254,9265.567214,1903.146664,14088.665615,18629.317393,18392.601650,3016.790206,24696.522305


Unnamed: 0_level_0,MSFT,JPY=X,1332.T,1605.T,1721.T,1801.T,1802.T,1803.T,1808.T,1812.T,...,WHR,WMB,WYNN,XEL,XRX,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-23,10354.696222,110.725998,508.962372,1074.314209,2499.224609,5158.541016,965.129883,1015.991211,1290.213257,1543.206055,...,15445.092340,2249.993004,20435.929505,4267.705721,2473.280057,7481.764094,8456.382147,11660.672839,5603.723287,9100.646914
2018-05-24,10238.102569,109.869003,499.000366,1059.467529,2519.971680,5167.065918,945.691895,1004.897339,1276.855469,1523.098877,...,14055.950683,2192.264324,20177.730137,4280.495849,2461.141702,7471.585106,8429.307655,11470.843817,5545.974325,9016.381583
2018-05-25,10191.380471,109.311996,495.377899,1029.774414,2532.075195,5150.013672,960.059021,1003.048584,1295.713867,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-28,10191.380471,109.623001,488.132965,1002.555603,2532.939697,5226.751953,964.284851,1008.595520,1300.428223,1531.476929,...,13990.246729,2165.106974,20133.491360,4292.036647,2431.242430,7370.732818,8292.125397,11345.281215,5440.609899,8856.354172
2018-05-29,10162.268306,109.389000,485.416046,998.844177,2547.635254,5201.171875,954.988220,1003.048584,1296.499512,1523.098877,...,13900.015957,2140.140969,20062.602902,4300.762748,2419.007724,7288.119736,8235.630008,11168.275387,5241.171231,8807.512849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-09,41467.410400,135.072998,601.000000,1469.000000,2628.000000,4775.000000,1163.000000,838.000000,1679.000000,1861.000000,...,18361.823684,3977.899896,15087.653470,9334.894977,1996.378875,14425.796604,18587.395344,18376.681796,3298.482623,24903.407990
2023-05-10,42251.793196,135.287994,596.000000,1458.000000,2646.000000,4780.000000,1172.000000,845.000000,1675.000000,1855.000000,...,18063.653588,3944.997896,15084.611374,9375.458424,1934.618346,14450.110350,18634.569090,18591.275941,3213.089867,25047.219198
2023-05-11,41581.719347,134.087006,587.000000,1476.000000,2661.000000,4730.000000,1089.000000,827.000000,1659.000000,1813.000000,...,17755.801038,3888.523163,14333.901105,9201.050694,1882.581554,14095.226399,18415.508860,18328.353125,3041.093328,24822.185825
2023-05-12,41555.847564,134.498001,622.000000,1460.000000,2720.000000,4735.000000,1107.000000,844.000000,1671.000000,1836.000000,...,17564.093471,3938.101565,14174.744254,9265.567214,1903.146664,14088.665615,18629.317393,18392.601650,3016.790206,24696.522305
