In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import torch
import os, random

cwd = os.getcwdb()
os.chdir('../archive/src/')
from TCN import TCN
os.chdir(cwd)

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# fix seed
seed = 0
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [7]:
def get_inputs(dataset_dir='../data'):
        """
        Args:
            dataset_dir (str)  : path to dataset directory
        Returns:
            dict[str]: path to dataset files
        """
        inputs = {
            "stock_list": f"{dataset_dir}/stock_list.csv",
            "stock_price": f"{dataset_dir}/stock_price.csv",
            "stock_fin": f"{dataset_dir}/stock_fin.csv",
            "stock_fin_price": f"{dataset_dir}/stock_fin_price.csv",
            #"all_code_tech": f"{dataset_dir}/all_code_tech.csv",
            #"all_code_fund": f"{dataset_dir}/all_code_fund.csv",
        }
        return inputs

In [10]:
def get_dataset(inputs):
        """
        Args:
            inputs (list[str]): path to dataset files
        Returns:
            dict[pd.DataFrame]: loaded data
        """
        dfs = {}
        for k, v in inputs.items():
            dfs[k] = pd.read_csv(v)
            
            # DataFrameのindexを設定します。
            if k == "stock_price":
                dfs[k].loc[:, "datetime"] = pd.to_datetime(
                    dfs[k].loc[:, "EndOfDayQuote Date"]
                )
                dfs[k].set_index("datetime", inplace=True)
            elif k in ["stock_fin", "stock_fin_price", "stock_labels"]:
                dfs[k].loc[:, "datetime"] = pd.to_datetime(
                    dfs[k].loc[:, "base_date"]
                )
                dfs[k].set_index("datetime", inplace=True)
            
        return dfs

In [11]:
dfs = get_dataset(get_inputs())

  if (await self.run_code(code, result,  async_=asy)):


In [12]:
def get_codes(dfs):
    """
    Args:
        dfs (dict[pd.DataFrame]): loaded data
    Returns:
        array: list of stock codes
    """
    stock_list = dfs["stock_list"].copy()
    # 予測対象の銘柄コードを取得
    codes = stock_list["Local Code"].values
    return codes
codes = get_codes(dfs)

In [13]:
def get_model(model_path='../archive/model/', labels=None):
    """Get model method

    Args:
        model_path (str): Path to the trained model directory.

    Returns:
        bool: The return value. True for success, False otherwise.

    """
    models = TCN(input_size=5, output_size=4, num_channels=[16, 8, 4, 2], kernel_size=2, dropout=0.5)
    weight_path = os.path.join(model_path, 'model.mdl')
    models.load_state_dict(torch.load(weight_path, map_location=torch.device('cpu') ))
    models.eval()
    
    print(True)

    return models
models = get_model()

True


In [144]:
#######################################################################
    # Technical Analytics                                                 #

#######################################################################

def calc_MADR( close:pd.core.series.Series, days:int) -> np.ndarray:
    '''移動平均乖離率を計算する'''
    MA = close.rolling(days).mean()
    MADR = ((close - MA) / MA).replace([np.inf, -np.inf], 0)
    return MADR.values


def calc_MXDR( high:pd.core.series.Series, days:int) -> np.ndarray:
    '''最高値乖離率を計算する'''
    MX = high.rolling(days).max()
    MXDR = ((high - MX) / MX).replace([np.inf, -np.inf], 0)
    return MXDR.values


def calc_MNDR( min_:pd.core.series.Series, days:int) -> np.ndarray:
    '''最安値乖離率を計算する'''
    MN = min_.rolling(days).min()
    MNDR = ((min_ - MN) / MN).replace([np.inf, -np.inf], 0)
    return MNDR.values


def calc_RNDR( close:int) -> int:
    '''キリ番(Round Number Divergence Rate...造語)との乖離率を計算する'''
    # 10円台, 1000円台, 10000円台ではスケールが異なる。
    # 99円までは10円を基準, 9999円までは100円を基準, 10000以上は1000円基準としてみる。
    #株価は0～93600の範囲をとりうる
    if close < 100:
        RN =int(Decimal(close).quantize(Decimal('1E1'), rounding=ROUND_HALF_UP))
    elif close < 10000:
        RN =int(Decimal(close).quantize(Decimal('1E2'), rounding=ROUND_HALF_UP))
    else:
        RN =int(Decimal(close).quantize(Decimal('1E3'), rounding=ROUND_HALF_UP))
    # 終値がキリ番の場合はゼロなり割れない為、場合分け
    if close - RN != 0:
        RNDR = (close - RN) / RN
    else:
        RNDR = 0
    return RNDR


def calc_RSI( close, day):
    '''RSIを計算する'''
    RSI = (close.diff().apply(lambda x: x if x >=0 else 0).rolling(day).sum() / close.diff().abs().rolling(day).sum()).replace([np.inf, -np.inf], 0)
    return RSI.values


def add_techniacl_data( df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    '''
    dfにテクニカル指標を追加
    '''
    df = df_target.copy()

    # 対数リターン(前日比)
    df["log_R"] = np.log1p(df["EndOfDayQuote ExchangeOfficialClose"]).diff()

    # リターン(変化率)
    df["return_5"] = df["EndOfDayQuote ExchangeOfficialClose"].pct_change(5)
    df["return_25"] = df["EndOfDayQuote ExchangeOfficialClose"].pct_change(25)
    df["return_75"] = df["EndOfDayQuote ExchangeOfficialClose"].pct_change(75)

    # ヒストリカルボラティリティ
    df["HV_5"] = df['log_R'].diff().rolling(5).std()
    df["HV_10"] = df['log_R'].diff().rolling(10).std()
    df["HV_25"] = df['log_R'].diff().rolling(25).std()
    df["HV_50"] = df['log_R'].diff().rolling(50).std()
    df["HV_75"] = df['log_R'].diff().rolling(75).std()
    df["HV_100"] = df['log_R'].diff().rolling(100).std()

    # ヒストリカルボラティリティの移動平均
    df["MA20_HV5"] = df['HV_5'].rolling(20).mean()
    df["MA20_HV10"] = df['HV_10'].rolling(20).mean()
    df["MA20_HV25"] = df['HV_25'].rolling(20).mean()
    df["MA20_HV50"] = df['HV_50'].rolling(20).mean()
    df["MA20_HV75"] = df['HV_75'].rolling(20).mean()
    df["MA20_HV100"] = df['HV_100'].rolling(20).mean()

    # 移動平均乖離(Moving Average Divergence Rate)を求める
    df['MADR5'] =  calc_MADR(df['EndOfDayQuote ExchangeOfficialClose'], 5)
    df['MADR25'] =  calc_MADR(df['EndOfDayQuote ExchangeOfficialClose'], 25)
    df['MADR75'] =  calc_MADR(df['EndOfDayQuote ExchangeOfficialClose'], 75)

    # 最高値との乖離
    df['MXDR5'] =  calc_MXDR(df['EndOfDayQuote High'], 5)
    df['MXDR10'] =  calc_MXDR(df['EndOfDayQuote High'], 10)
    df['MXDR20'] =  calc_MXDR(df['EndOfDayQuote High'], 20)

    # 最高値との乖離
    df['MNDR5'] =  calc_MNDR(df['EndOfDayQuote Low'], 5)
    df['MNDR10'] =  calc_MNDR(df['EndOfDayQuote Low'], 10)
    df['MNDR20'] =  calc_MNDR(df['EndOfDayQuote Low'], 20)

    # キリ番との乖離
    df['RNDR'] =  df['EndOfDayQuote ExchangeOfficialClose'].apply(calc_RNDR)

    # RSI
    df['RSI'] = calc_RSI(df["EndOfDayQuote ExchangeOfficialClose"], 14)

    # 値幅(高値-安値) / 終値: O-H_C
    df['H-L_C'] =  (df['EndOfDayQuote High'] - df['EndOfDayQuote Low']) / df['EndOfDayQuote ExchangeOfficialClose']
    df['MA5_H-L_C'] = df['H-L_C'].rolling(5).mean()
    df['MA10_H-L_C'] = df['H-L_C'].rolling(10).mean()
    df['MA25_H-L_C'] = df['H-L_C'].rolling(25).mean()
    df['MA50_H-L_C'] = df['H-L_C'].rolling(50).mean()
    df['MA75_H-L_C'] = df['H-L_C'].rolling(75).mean()
    df['MA100_H-L_C'] = df['H-L_C'].rolling(100).mean()

    # 欠損値は削除
    #df.dropna(inplace=True)

    # 欠損値は0とする
    # テストデータの予測で、欠損値を除外とするとエラーとなる。0とるのは古いデータのみであるため、基本的には影響なし。
    df.fillna(0, inplace=True)

    return df

#######################################################################
# Fundamenta; Analytics                                               #
#######################################################################

def clean_base_date_index( df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    '''
    更新対応処理:修正開示が20営業日以内の場合は修正前のデータはdfから削除する。
    20日以上の場合は、修正後のデータを削除する。今回は簡単のため、営業日基準とはしない。
    '''
    # 前処理
    df = df_target.copy()
    df['Result_FinancialStatement ModifyDate'] = pd.to_datetime(df['Result_FinancialStatement ModifyDate'])

    # 修正となったインデックス取得
    modify_index = np.where(df['Result_FinancialStatement ModifyDate'] != df.index)

    # 更新日
    modify_dates = df.index[modify_index]

    # 修正元の情報開示日
    base_dates = df.loc[modify_dates]['Result_FinancialStatement ModifyDate'].values

    # 差分を取り、判定
    diff_days = modify_dates - base_dates
    mask1 = [d.days <= 20 for d in diff_days]
    mask2 = [d.days > 20 for d in diff_days]

    # 修正前の情報開示日が20日以内のインデックス削除
    df = df.drop(base_dates[mask1]).copy()

    # 更新日が20日より後のインデックス削除
    df = df.drop(modify_dates[mask2]).copy()

    return df


def add_growth( df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    '''前期同期比の成長率を計算する'''
    df = df_target.sort_values(['Result_FinancialStatement ReportType', 'base_date']).copy()

    # 売上高成長率, 営業利益成長率, 経常利益成長率, 営業利益成長率
    df['NetSales_Growth'] = df['Result_FinancialStatement NetSales'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)
    df['OperatingIncome_Growth'] = df['Result_FinancialStatement OperatingIncome'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)
    df['OrdinaryIncome_Growth'] = df['Result_FinancialStatement OrdinaryIncome'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)
    df['NetIncome_Growth'] = df['Result_FinancialStatement NetIncome'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)

    # ReportTypeの変わり目(各レポートの最初のデータ)は0とする
    report_change_mask = df['Result_FinancialStatement ReportType'].ne(df['Result_FinancialStatement ReportType'].shift()).values
    report_change_ind = df.index[np.where(report_change_mask)]
    df.loc[report_change_ind, ['NetSales_Growth', 'OperatingIncome_Growth', 'OrdinaryIncome_Growth', 'NetIncome_Growth']] = 0

    # 順序を戻す(しなくてもいが)
    df = df.sort_values('base_date').copy()

    return df

def add_fundamental_data( df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
        df = df_target.copy()
        
        # 更新対応処理
        #df = clean_base_date_index(df)
        
        # 売上高営業利益率, 売上高経常利益率, 売上高当期純利益
        df['OperatingIncome_NetSales'] = (df['Result_FinancialStatement OperatingIncome'] / df['Result_FinancialStatement NetSales']).replace([np.inf, -np.inf], 0)
        df['OrdinaryIncome_NetSales'] = (df['Result_FinancialStatement OrdinaryIncome'] / df['Result_FinancialStatement NetSales']).replace([np.inf, -np.inf], 0)
        df['NetIncome_NetSales'] = (df['Result_FinancialStatement NetIncome'] / df['Result_FinancialStatement NetSales']).replace([np.inf, -np.inf], 0)
        
        # 前年度期比の売上高成長率, 営業利益成長率, 経常利益成長率, 営業利益成長率
        df = add_growth(df)
        
        # 来期予想成長率
        df['Forecast_NetSales_Growth'] = (df['Forecast_FinancialStatement NetSales'] / df['Result_FinancialStatement NetSales']-1).replace([np.inf, -np.inf], 0)
        df['Forecast_OperatingIncome_Growth'] = (df['Forecast_FinancialStatement OperatingIncome'] / df['Result_FinancialStatement OperatingIncome']-1).replace([np.inf, -np.inf], 0)
        df['Forecast_OrdinaryIncome_Growth'] = (df['Forecast_FinancialStatement OrdinaryIncome'] / df['Result_FinancialStatement OrdinaryIncome']-1).replace([np.inf, -np.inf], 0)
        df['Forecast_NetIncome_Growth'] = (df['Forecast_FinancialStatement NetIncome'] / df['Result_FinancialStatement NetIncome']-1).replace([np.inf, -np.inf], 0)
        
        # 自己資本比率, ROE, ROA
        df['Capital_Ratio'] = (df['Result_FinancialStatement NetAssets'] / df['Result_FinancialStatement TotalAssets']).replace([np.inf, -np.inf], 0)
        df['ROE'] = (df['Result_FinancialStatement NetIncome'] / df['Result_FinancialStatement NetAssets']).replace([np.inf, -np.inf], 0)
        df['ROA'] = (df['Result_FinancialStatement NetIncome'] / df['Result_FinancialStatement TotalAssets']).replace([np.inf, -np.inf], 0)
        
        # キャッシュフローの正負(1, 0, -1):pn(positive, negative)
        df['CF_Operating_pn'] = np.sign(df['Result_FinancialStatement CashFlowsFromOperatingActivities']).fillna(0)
        df['CF_Financing_pn'] = np.sign(df['Result_FinancialStatement CashFlowsFromFinancingActivities']).fillna(0)
        df['CF_Investing_pn'] = np.sign(df['Result_FinancialStatement CashFlowsFromInvestingActivities']).fillna(0)

        return df

In [145]:
TEST_START = "2020-01-01"
start_dt = TEST_START
def get_features_for_predict(dfs, code, start_dt=TEST_START):
        """
        Args:
            dfs (dict)  : dict of pd.DataFrame include stock_fin, stock_price
            code (int)  : A local code for a listed company
            start_dt (str): specify date range
        Returns:
            feature DataFrame (pd.DataFrame)
        """
        # stock_finデータを読み込み
        stock_fin = dfs["stock_fin"]

        # 1銘柄に関する財務諸表データ
        df_one_code_fund = stock_fin.loc[stock_fin['Local Code'] == code].copy()

        # ファンダメンタル指標を追加
        df_one_code_fund = add_fundamental_data(df_one_code_fund).copy()

        # 決算日の株価を取得する(株価のない財務データは除外)
        # stock_priceを読み込み
        stock_price = dfs["stock_price"]

        # 1銘柄に関する価格情報を取り出す
        df_one_code = stock_price.loc[stock_price['Local Code'] == code].copy()
        
        df_one_code_price = df_one_code[["EndOfDayQuote Date", "EndOfDayQuote ExchangeOfficialClose"]].copy()
        df_one_code_price.rename(columns={'EndOfDayQuote Date':'base_date'}, inplace=True)
        df_one_code_fund = pd.merge(df_one_code_fund, df_one_code_price, on='base_date').copy()
        
        # 配当利回りを計算
        df_one_code_fund['Dividend_Yeild'] = (df_one_code_fund['Result_Dividend QuarterlyDividendPerShare'] / df_one_code_fund["EndOfDayQuote ExchangeOfficialClose"]).replace([np.inf, -np.inf], 0)
        
        # 業種区分
        stock_list = dfs["stock_list"]
        df_one_code_fund['17_Sector'] = stock_list[stock_list['Local Code'] == code]['17 Sector(Code)'].values[0]

        
        df_one_code_fund.set_index('base_date', inplace=True)
        
        #print(df_one_code_fund)

        return df_one_code_fund[start_dt:]

In [146]:
get_features_for_predict(dfs, 1332)

Unnamed: 0_level_0,Local Code,Result_FinancialStatement AccountingStandard,Result_FinancialStatement FiscalPeriodEnd,Result_FinancialStatement ReportType,Result_FinancialStatement FiscalYear,Result_FinancialStatement ModifyDate,Result_FinancialStatement CompanyType,Result_FinancialStatement ChangeOfFiscalYearEnd,Result_FinancialStatement NetSales,Result_FinancialStatement OperatingIncome,Result_FinancialStatement OrdinaryIncome,Result_FinancialStatement NetIncome,Result_FinancialStatement TotalAssets,Result_FinancialStatement NetAssets,Result_FinancialStatement CashFlowsFromOperatingActivities,Result_FinancialStatement CashFlowsFromFinancingActivities,Result_FinancialStatement CashFlowsFromInvestingActivities,Forecast_FinancialStatement AccountingStandard,Forecast_FinancialStatement FiscalPeriodEnd,Forecast_FinancialStatement ReportType,Forecast_FinancialStatement FiscalYear,Forecast_FinancialStatement ModifyDate,Forecast_FinancialStatement CompanyType,Forecast_FinancialStatement ChangeOfFiscalYearEnd,Forecast_FinancialStatement NetSales,Forecast_FinancialStatement OperatingIncome,Forecast_FinancialStatement OrdinaryIncome,Forecast_FinancialStatement NetIncome,Result_Dividend FiscalPeriodEnd,Result_Dividend ReportType,Result_Dividend FiscalYear,Result_Dividend ModifyDate,Result_Dividend RecordDate,Result_Dividend DividendPayableDate,Result_Dividend QuarterlyDividendPerShare,Result_Dividend AnnualDividendPerShare,Forecast_Dividend FiscalPeriodEnd,Forecast_Dividend ReportType,Forecast_Dividend FiscalYear,Forecast_Dividend ModifyDate,Forecast_Dividend RecordDate,Forecast_Dividend QuarterlyDividendPerShare,Forecast_Dividend AnnualDividendPerShare,OperatingIncome_NetSales,OrdinaryIncome_NetSales,NetIncome_NetSales,NetSales_Growth,OperatingIncome_Growth,OrdinaryIncome_Growth,NetIncome_Growth,Forecast_NetSales_Growth,Forecast_OperatingIncome_Growth,Forecast_OrdinaryIncome_Growth,Forecast_NetIncome_Growth,Capital_Ratio,ROE,ROA,CF_Operating_pn,CF_Financing_pn,CF_Investing_pn,EndOfDayQuote ExchangeOfficialClose,Dividend_Yeild,17_Sector
base_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1
2020/02/05,1332,ConsolidatedJP,2019/12,Q3,2020.0,2020/02/05,GB,False,526828.0,19068.0,21613.0,14791.0,500636.0,173924.0,,,,ConsolidatedJP,2020/03,Annual,2020.0,2020/02/05,GB,False,700000.0,24000.0,26500.0,17500.0,2019/09,Q2,2020.0,2020/02/05,2019/09/30,2019/12/02,4.0,,2020/03,Annual,2020.0,2020/02/05,2020/03/31,4.5,8.5,0.036194,0.041025,0.028076,-0.030249,-0.041761,-0.05256,-0.031559,0.328707,0.258653,0.226114,0.183152,0.347406,0.085043,0.029544,0.0,0.0,0.0,618.0,0.006472,1
2020/05/20,1332,ConsolidatedJP,2020/03,Annual,2020.0,2020/05/20,GB,False,690016.0,22834.0,25807.0,14768.0,491533.0,172300.0,18786.0,25942.0,-29446.0,ConsolidatedJP,2021/03,Annual,2021.0,2020/05/20,GB,False,670000.0,19000.0,21500.0,15000.0,2020/03,Annual,2020.0,2020/05/20,2020/03/31,2020/06/08,4.5,8.5,2020/09,Q2,2021.0,2020/05/20,,4.0,,0.033092,0.037401,0.021402,-0.031027,0.052986,0.017706,-0.03973,-0.029008,-0.167908,-0.166893,0.01571,0.350536,0.085711,0.030045,1.0,1.0,-1.0,485.0,0.009278,1
2020/08/03,1332,ConsolidatedJP,2020/06,Q1,2021.0,2020/08/03,GB,False,162254.0,4251.0,4952.0,3189.0,496893.0,172094.0,,,,ConsolidatedJP,2021/03,Annual,2021.0,2020/08/03,GB,False,670000.0,19000.0,21500.0,15000.0,2020/03,Annual,2020.0,2020/08/03,2020/03/31,2020/06/08,4.5,8.5,2020/09,Q2,2021.0,2020/05/20,,4.0,,0.0262,0.03052,0.019654,-0.068271,-0.254734,-0.183781,-0.118817,3.129328,3.469537,3.34168,3.703669,0.34634,0.018531,0.006418,0.0,0.0,0.0,465.0,0.009677,1
2020/11/05,1332,ConsolidatedJP,2020/09,Q2,2021.0,2020/11/05,GB,False,320819.0,6968.0,8959.0,5034.0,481703.0,177555.0,,,,ConsolidatedJP,2021/03,Annual,2021.0,2020/11/05,GB,False,650000.0,15000.0,18500.0,11500.0,2020/09,Q2,2021.0,2020/11/05,2020/09/30,2020/12/07,4.0,,2021/03,Annual,2021.0,2020/11/05,2021/03/31,4.5,8.5,0.021719,0.027925,0.015691,-0.071172,-0.370949,-0.261905,-0.357006,1.026065,1.152698,1.064963,1.284466,0.368598,0.028352,0.01045,0.0,0.0,0.0,417.0,0.009592,1


In [147]:
# 特徴量(ファンダメンタル)を作成
buff = []
for code in codes:
    buff.append(get_features_for_predict(dfs, code))
feats_cs = pd.concat(buff)

# ダミー変数化 & 不要な列を削除
category_cols = ['CF_Operating_pn', 'CF_Financing_pn', 'CF_Investing_pn', '17_Sector', 'Result_FinancialStatement ReportType']
feats_cs = pd.get_dummies(feats_cs, columns=category_cols).copy()

delete_cols = feats_cs.iloc[:, 1:42].columns
print('delete cols:', delete_cols)
feats_cs.fillna(0, inplace=True)
dfs['all_code_fund'] = feats_cs.drop(delete_cols, axis=1).copy()

delete cols: Index(['Result_FinancialStatement AccountingStandard',
       'Result_FinancialStatement FiscalPeriodEnd',
       'Result_FinancialStatement FiscalYear',
       'Result_FinancialStatement ModifyDate',
       'Result_FinancialStatement CompanyType',
       'Result_FinancialStatement ChangeOfFiscalYearEnd',
       'Result_FinancialStatement NetSales',
       'Result_FinancialStatement OperatingIncome',
       'Result_FinancialStatement OrdinaryIncome',
       'Result_FinancialStatement NetIncome',
       'Result_FinancialStatement TotalAssets',
       'Result_FinancialStatement NetAssets',
       'Result_FinancialStatement CashFlowsFromOperatingActivities',
       'Result_FinancialStatement CashFlowsFromFinancingActivities',
       'Result_FinancialStatement CashFlowsFromInvestingActivities',
       'Forecast_FinancialStatement AccountingStandard',
       'Forecast_FinancialStatement FiscalPeriodEnd',
       'Forecast_FinancialStatement ReportType',
       'Forecast_Financia

In [148]:
delete_cols = feats_cs.iloc[:, 1:42].columns
print('delete cols:', delete_cols)
feats_cs.fillna(0, inplace=True)
dfs['all_code_fund'] = feats_cs.drop(delete_cols, axis=1).copy()

delete cols: Index(['Result_FinancialStatement AccountingStandard',
       'Result_FinancialStatement FiscalPeriodEnd',
       'Result_FinancialStatement FiscalYear',
       'Result_FinancialStatement ModifyDate',
       'Result_FinancialStatement CompanyType',
       'Result_FinancialStatement ChangeOfFiscalYearEnd',
       'Result_FinancialStatement NetSales',
       'Result_FinancialStatement OperatingIncome',
       'Result_FinancialStatement OrdinaryIncome',
       'Result_FinancialStatement NetIncome',
       'Result_FinancialStatement TotalAssets',
       'Result_FinancialStatement NetAssets',
       'Result_FinancialStatement CashFlowsFromOperatingActivities',
       'Result_FinancialStatement CashFlowsFromFinancingActivities',
       'Result_FinancialStatement CashFlowsFromInvestingActivities',
       'Forecast_FinancialStatement AccountingStandard',
       'Forecast_FinancialStatement FiscalPeriodEnd',
       'Forecast_FinancialStatement ReportType',
       'Forecast_Financia

In [149]:
dfs['all_code_fund']

Unnamed: 0_level_0,Local Code,OperatingIncome_NetSales,OrdinaryIncome_NetSales,NetIncome_NetSales,NetSales_Growth,OperatingIncome_Growth,OrdinaryIncome_Growth,NetIncome_Growth,Forecast_NetSales_Growth,Forecast_OperatingIncome_Growth,Forecast_OrdinaryIncome_Growth,Forecast_NetIncome_Growth,Capital_Ratio,ROE,ROA,EndOfDayQuote ExchangeOfficialClose,Dividend_Yeild,CF_Operating_pn_-1.0,CF_Operating_pn_0.0,CF_Operating_pn_1.0,CF_Financing_pn_-1.0,CF_Financing_pn_0.0,CF_Financing_pn_1.0,CF_Investing_pn_-1.0,CF_Investing_pn_0.0,CF_Investing_pn_1.0,17_Sector_1,17_Sector_2,17_Sector_3,17_Sector_4,17_Sector_5,17_Sector_6,17_Sector_7,17_Sector_8,17_Sector_9,17_Sector_10,17_Sector_11,17_Sector_12,17_Sector_13,17_Sector_14,17_Sector_15,17_Sector_16,17_Sector_17,Result_FinancialStatement ReportType_Annual,Result_FinancialStatement ReportType_Q1,Result_FinancialStatement ReportType_Q2,Result_FinancialStatement ReportType_Q3
base_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1
2020/02/07,1301,0.012734,0.014501,0.008465,0.038694,-0.202439,-0.240632,-0.304678,0.362955,0.529052,0.510574,0.725129,0.253724,0.052058,0.013208,2888.0,0.024238,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2020/05/12,1301,0.011115,0.013744,0.007759,0.024860,-0.238319,-0.186288,-0.300961,0.028497,0.439342,0.247228,0.472754,0.293145,0.062498,0.018321,2612.0,0.026799,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2020/08/07,1301,0.011653,0.012831,0.010401,-0.132878,-0.177922,-0.313300,-0.199717,3.970545,5.635071,5.456241,4.309735,0.287110,0.017163,0.004928,2675.0,0.026168,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
2020/11/06,1301,0.011800,0.012879,0.013495,-0.078993,0.876190,0.334220,1.221127,1.310437,2.045685,1.990033,0.902346,0.298244,0.045737,0.013641,2782.0,0.025162,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2020/02/05,1332,0.036194,0.041025,0.028076,-0.030249,-0.041761,-0.052560,-0.031559,0.328707,0.258653,0.226114,0.183152,0.347406,0.085043,0.029544,618.0,0.006472,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020/01/31,9997,0.052872,0.053708,0.033392,0.030122,-0.140189,-0.346569,-0.359900,0.308349,0.512235,0.488699,0.523727,0.452265,0.044667,0.020201,627.0,0.012759,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
2020/05/13,9997,0.057300,0.057600,0.032576,0.012947,-0.141108,-0.322947,-0.433240,-0.567097,-1.058190,-1.009648,-1.010235,0.459490,0.057176,0.026272,511.0,0.015656,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0
2020/07/31,9997,0.050851,0.054992,0.030741,0.051544,0.264992,0.503662,0.340521,0.605060,-1.243112,-1.037467,-1.040214,0.450821,0.014374,0.006480,749.0,0.010681,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
2020/10/29,9997,0.050851,0.054992,0.030741,0.000000,0.000000,0.000000,0.000000,0.877838,0.849676,0.919820,0.959115,0.450821,0.014374,0.006480,989.0,0.008089,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0


In [150]:
def get_past_data(data_cs, one_code_tech, n=5):
    '''銘柄(code)の5日前～基準日のデータを取り出す'''
    # 取り出し対象列
    extract_cols = ['EndOfDayQuote Volume', 'log_R', 'return_5', 'return_25', 'return_75', 
                    'HV_5', 'HV_10', 'HV_25', "HV_50", 'HV_75', 'HV_100',
                    'MA20_HV5', 'MA20_HV10', 'MA20_HV25', 'MA20_HV50', 'MA20_HV75', 'MA20_HV100', 
                    'MADR5', 'MADR25', 'MADR75', 'MXDR5', 'MXDR10', 'MXDR20', 'MNDR5', 'MNDR10', 'MNDR20', 'RNDR', 
                    'RSI', 'H-L_C', 'MA25_H-L_C']

    ts_datas = []
    for base_date in data_cs.index:
        ts_num = len(one_code_tech.loc[:base_date])
        if ts_num >= 5:
            ts_data = one_code_tech.loc[:base_date][extract_cols].tail(5).values
            #print('over 5')
        else:
            head = np.zeros((5-ts_num, len(extract_cols)))
            if ts_num == 0:
                ts_data = head
            else:
                #print(code, ' : under 5')
                tail = one_code_tech.loc[:base_date][extract_cols].values
                ts_data = np.concatenate([head, tail])

        ts_datas.append(ts_data)

    return np.concatenate([ts_datas])

In [167]:
def get_model_inputs(dfs, code, start_dt=TEST_START):
    # all_code_fundを読み込み
    all_code_fund = dfs["all_code_fund"]

    # 1銘柄に関するファンダメンタル情報を取り出す
    data_cs = all_code_fund.loc[all_code_fund['Local Code'] == code].copy()
    print(data_cs)

    # stock_priceを読み込み
    stock_price = dfs["stock_price"]

    # 1銘柄に関する価格情報を取り出す
    df_one_code = stock_price.loc[stock_price['Local Code'] == code].copy()
    #print(df_one_code)

    # テクニカル指標を追加
    one_code_tech = add_techniacl_data(df_one_code).copy()
    data_ts = get_past_data(data_cs[start_dt:], one_code_tech)

    # Torch.Tensor
    inputs_ts = torch.Tensor(data_ts) # (N, 5, 30)
    inputs_cs = torch.Tensor(data_cs[start_dt:].iloc[:, 1:].values) # base_date, Local Codeを除く # (N, 46)

    return inputs_ts, inputs_cs, data_cs[start_dt:] # stard_dtは無くてもよい

In [168]:
from decimal import Decimal, ROUND_HALF_UP
inputs_ts, inputs_cs, data_cs = get_model_inputs(dfs, 7337)
print(inputs_ts.size(), inputs_cs.size())

            Local Code  OperatingIncome_NetSales  OrdinaryIncome_NetSales  \
base_date                                                                   
2020/11/09        7337                       0.0                      0.0   

            NetIncome_NetSales  NetSales_Growth  OperatingIncome_Growth  \
base_date                                                                 
2020/11/09                 0.0              0.0                     0.0   

            OrdinaryIncome_Growth  NetIncome_Growth  Forecast_NetSales_Growth  \
base_date                                                                       
2020/11/09                    0.0               0.0                       0.0   

            Forecast_OperatingIncome_Growth  Forecast_OrdinaryIncome_Growth  \
base_date                                                                     
2020/11/09                              0.0                             0.0   

            Forecast_NetIncome_Growth  Capital_Ratio  ROE  R

In [170]:
#models(inputs_ts, inputs_cs)
# stock_finデータを読み込み
stock_fin = dfs["stock_fin"]

# 1銘柄に関する財務諸表データ
df_one_code_fund = stock_fin.loc[stock_fin['Local Code'] == 7337].copy()

df_one_code_fund

Unnamed: 0_level_0,base_date,Local Code,Result_FinancialStatement AccountingStandard,Result_FinancialStatement FiscalPeriodEnd,Result_FinancialStatement ReportType,Result_FinancialStatement FiscalYear,Result_FinancialStatement ModifyDate,Result_FinancialStatement CompanyType,Result_FinancialStatement ChangeOfFiscalYearEnd,Result_FinancialStatement NetSales,Result_FinancialStatement OperatingIncome,Result_FinancialStatement OrdinaryIncome,Result_FinancialStatement NetIncome,Result_FinancialStatement TotalAssets,Result_FinancialStatement NetAssets,Result_FinancialStatement CashFlowsFromOperatingActivities,Result_FinancialStatement CashFlowsFromFinancingActivities,Result_FinancialStatement CashFlowsFromInvestingActivities,Forecast_FinancialStatement AccountingStandard,Forecast_FinancialStatement FiscalPeriodEnd,Forecast_FinancialStatement ReportType,Forecast_FinancialStatement FiscalYear,Forecast_FinancialStatement ModifyDate,Forecast_FinancialStatement CompanyType,Forecast_FinancialStatement ChangeOfFiscalYearEnd,Forecast_FinancialStatement NetSales,Forecast_FinancialStatement OperatingIncome,Forecast_FinancialStatement OrdinaryIncome,Forecast_FinancialStatement NetIncome,Result_Dividend FiscalPeriodEnd,Result_Dividend ReportType,Result_Dividend FiscalYear,Result_Dividend ModifyDate,Result_Dividend RecordDate,Result_Dividend DividendPayableDate,Result_Dividend QuarterlyDividendPerShare,Result_Dividend AnnualDividendPerShare,Forecast_Dividend FiscalPeriodEnd,Forecast_Dividend ReportType,Forecast_Dividend FiscalYear,Forecast_Dividend ModifyDate,Forecast_Dividend RecordDate,Forecast_Dividend QuarterlyDividendPerShare,Forecast_Dividend AnnualDividendPerShare
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1
2020-11-09,2020/11/09,7337,,,,,,,,,,,,,,,,,ConsolidatedJP,2021/03,Annual,2021.0,2020/11/09,BK,False,,,31000.0,21500.0,,,,,,,,,2021/03,Annual,2021.0,2020/11/09,2021/03/31,12.0,24.0


In [154]:
TEST_START = "2020-01-01"
start_dt = TEST_START

results = []

# 特徴量(ファンダメンタル)を作成
buff = []
for code in codes:
    buff.append(get_features_for_predict(dfs, code, start_dt))
feats_cs = pd.concat(buff)

# ダミー変数化 & 不要な列を削除
category_cols = ['CF_Operating_pn', 'CF_Financing_pn', 'CF_Investing_pn', '17_Sector', 'Result_FinancialStatement ReportType']
feats_cs = pd.get_dummies(feats_cs, columns=category_cols).copy()

delete_cols = feats_cs.iloc[:, 1:42].columns
feats_cs.fillna(0, inplace=True)
dfs['all_code_fund'] = feats_cs.drop(delete_cols, axis=1).copy()

AttributeError: 'Index' object has no attribute 'strftime'

In [155]:
for code in codes[:10]:
    inputs_ts, inputs_cs, data_cs = get_model_inputs(dfs, code)
    predicts = models(inputs_ts, inputs_cs) # 'label_high_20', 'label_low_20', 'high_low_20', 'center_20'
    
    # 結果格納用dfを作成
    df_result = data_cs[['Local Code']].copy()
    df_result.index = pd.to_datetime(df_result.index)
    df_result['Local Code'] = df_result.index.strftime("%Y-%m-%d-") + df_result.loc[:, "Local Code"].astype(str)
    
    # 予測した値を格納("label_high_20", "label_low_20")
    width = predicts[:, 2].detach().numpy().copy() / 2
    df_result['label_high_20'] = (predicts[:, 0].detach().numpy().copy() \
                                + (predicts[:, 3].detach().numpy().copy() + width)) / 2
    df_result['label_low_20'] = (predicts[:, 1].detach().numpy().copy() \
                                + (predicts[:, 3].detach().numpy().copy() - width)) / 2
    
    results.append(df_result)
pd.concat(results)

Unnamed: 0_level_0,Local Code,label_high_20,label_low_20
base_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-07,2020-02-07-1301,0.081914,-0.059311
2020-05-12,2020-05-12-1301,0.083395,-0.059175
2020-08-07,2020-08-07-1301,0.083056,-0.059224
2020-11-06,2020-11-06-1301,0.08249,-0.059266
2020-02-05,2020-02-05-1332,0.103396,-0.066912
2020-05-20,2020-05-20-1332,0.093937,-0.06416
2020-08-03,2020-08-03-1332,0.108972,-0.062671
2020-11-05,2020-11-05-1332,0.108484,-0.066375
2020-02-03,2020-02-03-1333,0.085392,-0.059808
2020-05-14,2020-05-14-1333,0.086786,-0.059659


In [156]:
pd.concat([pd.DataFrame(predicts.detach().numpy()), pd.DataFrame(predicts.detach().numpy())])

Unnamed: 0,0,1,2,3
0,0.084477,-0.059188,0.143989,0.01305
1,0.083771,-0.059268,0.143358,0.012716
2,0.082212,-0.059437,0.141942,0.011978
3,0.080986,-0.059568,0.140836,0.011404
4,0.077215,-0.060263,0.137721,0.009288
0,0.084477,-0.059188,0.143989,0.01305
1,0.083771,-0.059268,0.143358,0.012716
2,0.082212,-0.059437,0.141942,0.011978
3,0.080986,-0.059568,0.140836,0.011404
4,0.077215,-0.060263,0.137721,0.009288


In [157]:
inputs_ts, inputs_cs, data_cs = get_model_inputs(dfs, 1375)
predicts = models(inputs_ts, inputs_cs) # 'label_high_20', 'label_low_20', 'high_low_20', 'center_20'
predicts

tensor([[ 0.0943, -0.0608,  0.1594,  0.0179],
        [ 0.0907, -0.0604,  0.1538,  0.0158]], grad_fn=<AddmmBackward>)

In [158]:
inputs_ts

tensor([[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
           0.0000e+00, 