In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import torch
import os, random

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# fix seed
seed = 0
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

# 前処理

In [3]:
from decimal import Decimal, ROUND_HALF_UP

def calc_MADR(close:pd.core.series.Series, days:int) -> np.ndarray:
    '''移動平均乖離率を計算する'''
    MA = close.rolling(days).mean()
    MADR = ((close - MA) / MA).replace([np.inf, -np.inf], 0)
    return MADR.values

def calc_MXDR(high:pd.core.series.Series, days:int) -> np.ndarray:
    '''最高値乖離率を計算する'''
    MX = high.rolling(days).max()
    MXDR = ((high - MX) / MX).replace([np.inf, -np.inf], 0)
    return MXDR.values

def calc_MNDR(min_:pd.core.series.Series, days:int) -> np.ndarray:
    '''最安値乖離率を計算する'''
    MN = min_.rolling(days).min()
    MNDR = ((min_ - MN) / MN).replace([np.inf, -np.inf], 0)
    return MNDR.values

def calc_RNDR(close:int) -> int:
    '''キリ番(Round Number Divergence Rate...造語)との乖離率を計算する'''
    # 10円台, 1000円台, 10000円台ではスケールが異なる。
    # 99円までは10円を基準, 9999円までは100円を基準, 10000以上は1000円基準としてみる。
    #株価は0～93600の範囲をとりうる
    if close < 100:
        RN =int(Decimal(close).quantize(Decimal('1E1'), rounding=ROUND_HALF_UP))
    elif close < 10000:
        RN =int(Decimal(close).quantize(Decimal('1E2'), rounding=ROUND_HALF_UP))
    else:
        RN =int(Decimal(close).quantize(Decimal('1E3'), rounding=ROUND_HALF_UP))
    # 終値がキリ番の場合はゼロなり割れない為、場合分け
    if close - RN != 0:
        RNDR = (close - RN) / RN
    else:
        RNDR = 0
    return RNDR

def calc_RSI(close, day):
    '''RSIを計算する'''
    RSI = (close.diff().apply(lambda x: x if x >=0 else 0).rolling(day).sum() / close.diff().abs().rolling(day).sum()).replace([np.inf, -np.inf], 0)
    return RSI.values

def add_techniacl_data(df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    '''
    dfにテクニカル指標を追加
    '''
    df = df_target.copy()
    
    # 対数リターン(前日比)
    df["log_R"] = np.log1p(df["EndOfDayQuote ExchangeOfficialClose"]).diff()
    
    # リターン(変化率)
    df["return_5"] = df["EndOfDayQuote ExchangeOfficialClose"].pct_change(5)
    df["return_25"] = df["EndOfDayQuote ExchangeOfficialClose"].pct_change(25)
    df["return_75"] = df["EndOfDayQuote ExchangeOfficialClose"].pct_change(75)
    
    # ヒストリカルボラティリティ
    df["HV_5"] = df['log_R'].diff().rolling(5).std()
    df["HV_10"] = df['log_R'].diff().rolling(10).std()
    df["HV_25"] = df['log_R'].diff().rolling(25).std()
    df["HV_50"] = df['log_R'].diff().rolling(50).std()
    df["HV_75"] = df['log_R'].diff().rolling(75).std()
    df["HV_100"] = df['log_R'].diff().rolling(100).std()
    
    # ヒストリカルボラティリティの移動平均
    df["MA20_HV5"] = df['HV_5'].rolling(20).mean()
    df["MA20_HV10"] = df['HV_10'].rolling(20).mean()
    df["MA20_HV25"] = df['HV_25'].rolling(20).mean()
    df["MA20_HV50"] = df['HV_50'].rolling(20).mean()
    df["MA20_HV75"] = df['HV_75'].rolling(20).mean()
    df["MA20_HV100"] = df['HV_100'].rolling(20).mean()
    
    # 移動平均乖離(Moving Average Divergence Rate)を求める
    df['MADR5'] =  calc_MADR(df['EndOfDayQuote ExchangeOfficialClose'], 5)
    df['MADR25'] =  calc_MADR(df['EndOfDayQuote ExchangeOfficialClose'], 25)
    df['MADR75'] =  calc_MADR(df['EndOfDayQuote ExchangeOfficialClose'], 75)
    
    # 最高値との乖離
    df['MXDR5'] =  calc_MXDR(df['EndOfDayQuote High'], 5)
    df['MXDR10'] =  calc_MXDR(df['EndOfDayQuote High'], 10)
    df['MXDR20'] =  calc_MXDR(df['EndOfDayQuote High'], 20)
    
    # 最高値との乖離
    df['MNDR5'] =  calc_MNDR(df['EndOfDayQuote Low'], 5)
    df['MNDR10'] =  calc_MNDR(df['EndOfDayQuote Low'], 10)
    df['MNDR20'] =  calc_MNDR(df['EndOfDayQuote Low'], 20)
    
    # キリ番との乖離
    df['RNDR'] =  df['EndOfDayQuote ExchangeOfficialClose'].apply(calc_RNDR)
    
    # RSI
    df['RSI'] = calc_RSI(df["EndOfDayQuote ExchangeOfficialClose"], 14)
    
    # 値幅(高値-安値) / 終値: O-H_C
    df['H-L_C'] =  (df['EndOfDayQuote High'] - df['EndOfDayQuote Low']) / df['EndOfDayQuote ExchangeOfficialClose']
    df['MA5_H-L_C'] = df['H-L_C'].rolling(5).mean()
    df['MA10_H-L_C'] = df['H-L_C'].rolling(10).mean()
    df['MA25_H-L_C'] = df['H-L_C'].rolling(25).mean()
    df['MA50_H-L_C'] = df['H-L_C'].rolling(50).mean()
    df['MA75_H-L_C'] = df['H-L_C'].rolling(75).mean()
    df['MA100_H-L_C'] = df['H-L_C'].rolling(100).mean()
    
    # 欠損値は削除
    # df.dropna(inplace=True)
    
    # 欠損値は削除
    df.fillna(0, inplace=True)
    
    return df

In [4]:
def clean_base_date_index(df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    '''
    更新対応処理:修正開示が20営業日以内の場合は修正前のデータはdfから削除する。
    20日以上の場合は、修正後のデータを削除する。今回は簡単のため、営業日基準とはしない。
    '''
    # 前処理
    df = df_target.copy()
    df['Result_FinancialStatement ModifyDate'] = pd.to_datetime(df['Result_FinancialStatement ModifyDate'])

    # 修正となったインデックス取得
    modify_index = np.where(df['Result_FinancialStatement ModifyDate'] != df.index)

    # 更新日
    modify_dates = df.index[modify_index]

    # 修正元の情報開示日
    base_dates = df.loc[modify_dates]['Result_FinancialStatement ModifyDate'].values

    # 差分を取り、判定
    diff_days = modify_dates - base_dates
    mask1 = [d.days <= 20 for d in diff_days]
    mask2 = [d.days > 20 for d in diff_days]
    
    # 修正前の情報開示日が20日以内のインデックス削除
    df = df.drop(base_dates[mask1]).copy()
    
    # 更新日が20日より後のインデックス削除
    df = df.drop(modify_dates[mask2]).copy()
    
    return df

def add_growth(df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    '''前期同期比の成長率を計算する'''
    df = df_target.sort_values(['Result_FinancialStatement ReportType', 'base_date']).copy()
    
    # 売上高成長率, 営業利益成長率, 経常利益成長率, 営業利益成長率
    df['NetSales_Growth'] = df['Result_FinancialStatement NetSales'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)
    df['OperatingIncome_Growth'] = df['Result_FinancialStatement OperatingIncome'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)
    df['OrdinaryIncome_Growth'] = df['Result_FinancialStatement OrdinaryIncome'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)
    df['NetIncome_Growth'] = df['Result_FinancialStatement NetIncome'].pct_change().replace([np.inf, -np.inf], 0).fillna(0)

    # ReportTypeの変わり目(各レポートの最初のデータ)は0とする
    report_change_mask = df['Result_FinancialStatement ReportType'].ne(df['Result_FinancialStatement ReportType'].shift()).values
    report_change_ind = df.index[np.where(report_change_mask)]
    df.loc[report_change_ind, ['NetSales_Growth', 'OperatingIncome_Growth', 'OrdinaryIncome_Growth', 'NetIncome_Growth']] = 0
    
    # 順序を戻す(しなくてもいが)
    df = df.sort_values('base_date').copy()
    
    return df
    
def add_fundamental_data(df_target: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    df = df_target.copy()
    
    # 更新対応処理
    df = clean_base_date_index(df)
    
    # 売上高営業利益率, 売上高経常利益率, 売上高当期純利益
    df['OperatingIncome_NetSales'] = (df['Result_FinancialStatement OperatingIncome'] / df['Result_FinancialStatement NetSales']).replace([np.inf, -np.inf], 0)
    df['OrdinaryIncome_NetSales'] = (df['Result_FinancialStatement OrdinaryIncome'] / df['Result_FinancialStatement NetSales']).replace([np.inf, -np.inf], 0)
    df['NetIncome_NetSales'] = (df['Result_FinancialStatement NetIncome'] / df['Result_FinancialStatement NetSales']).replace([np.inf, -np.inf], 0)
    
    # 前年度期比の売上高成長率, 営業利益成長率, 経常利益成長率, 営業利益成長率
    df = add_growth(df)
    
    # 来期予想成長率
    df['Forecast_NetSales_Growth'] = (df['Forecast_FinancialStatement NetSales'] / df['Result_FinancialStatement NetSales']-1).replace([np.inf, -np.inf], 0)
    df['Forecast_OperatingIncome_Growth'] = (df['Forecast_FinancialStatement OperatingIncome'] / df['Result_FinancialStatement OperatingIncome']-1).replace([np.inf, -np.inf], 0)
    df['Forecast_OrdinaryIncome_Growth'] = (df['Forecast_FinancialStatement OrdinaryIncome'] / df['Result_FinancialStatement OrdinaryIncome']-1).replace([np.inf, -np.inf], 0)
    df['Forecast_NetIncome_Growth'] = (df['Forecast_FinancialStatement NetIncome'] / df['Result_FinancialStatement NetIncome']-1).replace([np.inf, -np.inf], 0)
    
    # 自己資本比率, ROE, ROA
    df['Capital_Ratio'] = (df['Result_FinancialStatement NetAssets'] / df['Result_FinancialStatement TotalAssets']).replace([np.inf, -np.inf], 0)
    df['ROE'] = (df['Result_FinancialStatement NetIncome'] / df['Result_FinancialStatement NetAssets']).replace([np.inf, -np.inf], 0)
    df['ROA'] = (df['Result_FinancialStatement NetIncome'] / df['Result_FinancialStatement TotalAssets']).replace([np.inf, -np.inf], 0)
    
    # キャッシュフローの正負(1, 0, -1):pn(positive, negative)
    df['CF_Operating_pn'] = np.sign(df['Result_FinancialStatement CashFlowsFromOperatingActivities']).fillna(0)
    df['CF_Financing_pn'] = np.sign(df['Result_FinancialStatement CashFlowsFromFinancingActivities']).fillna(0)
    df['CF_Investing_pn'] = np.sign(df['Result_FinancialStatement CashFlowsFromInvestingActivities']).fillna(0)

    return df    

In [5]:
# load data
stock_list = pd.read_csv('../data/stock_list.csv')
stock_price = pd.read_csv('../data/stock_price.csv')
stock_fin = pd.read_csv('../data/stock_fin.csv', index_col='base_date')
stock_labels = pd.read_csv('../data/stock_labels.csv')

# Datetimeに変換
stock_price['EndOfDayQuote Date'] = pd.to_datetime(stock_price['EndOfDayQuote Date'])
stock_fin.index = pd.to_datetime(stock_fin.index)

In [6]:
# 銘柄リストを取得
codes = sorted(set(stock_price['Local Code'].values))

for i, code in enumerate(codes):
    # 1銘柄に関する価格情報を取り出す
    df_one_code = stock_price.loc[stock_price['Local Code'] == code].copy()
    
    # テクニカル指標を追加
    df_one_code_tech = add_techniacl_data(df_one_code).copy()
    
    # 1銘柄に関する財務諸表データ
    df_one_code_fund = stock_fin.loc[stock_fin['Local Code'] == code].copy()
    
    # ファンダメンタル指標を追加
    df_one_code_fund = add_fundamental_data(df_one_code_fund).copy()
    
    # 決算日の株価を取得する(株価のない財務データは除外)
    df_one_code_price = df_one_code[["EndOfDayQuote Date", "EndOfDayQuote ExchangeOfficialClose"]].copy()
    df_one_code_price.rename(columns={'EndOfDayQuote Date':'base_date'}, inplace=True)
    df_one_code_fund = pd.merge(df_one_code_fund, df_one_code_price, on='base_date').copy()
    
    # 配当利回りを計算
    df_one_code_fund['Dividend_Yeild'] = (df_one_code_fund['Result_Dividend QuarterlyDividendPerShare'] / df_one_code_fund["EndOfDayQuote ExchangeOfficialClose"]).replace([np.inf, -np.inf], 0)
    
    # 業種区分
    df_one_code_fund['17_Sector'] = stock_list[stock_list['Local Code'] == code]['17 Sector(Code)'].values[0]
    
    if i == 0:
        df_all_code_tech = df_one_code_tech.copy()
        df_all_code_fund = df_one_code_fund.copy()
        
    else:
        df_all_code_tech = pd.concat([df_all_code_tech, df_one_code_tech], axis=0)
        df_all_code_fund = pd.concat([df_all_code_fund, df_one_code_fund], axis=0)

In [7]:
category_cols = ['CF_Operating_pn', 'CF_Financing_pn', 'CF_Investing_pn', '17_Sector', 'Result_FinancialStatement ReportType']
df_all_code_fund = pd.get_dummies(df_all_code_fund, columns=category_cols).copy()
df_all_code_fund.head()

Unnamed: 0,base_date,Local Code,Result_FinancialStatement AccountingStandard,Result_FinancialStatement FiscalPeriodEnd,Result_FinancialStatement FiscalYear,Result_FinancialStatement ModifyDate,Result_FinancialStatement CompanyType,Result_FinancialStatement ChangeOfFiscalYearEnd,Result_FinancialStatement NetSales,Result_FinancialStatement OperatingIncome,Result_FinancialStatement OrdinaryIncome,Result_FinancialStatement NetIncome,Result_FinancialStatement TotalAssets,Result_FinancialStatement NetAssets,Result_FinancialStatement CashFlowsFromOperatingActivities,Result_FinancialStatement CashFlowsFromFinancingActivities,Result_FinancialStatement CashFlowsFromInvestingActivities,Forecast_FinancialStatement AccountingStandard,Forecast_FinancialStatement FiscalPeriodEnd,Forecast_FinancialStatement ReportType,Forecast_FinancialStatement FiscalYear,Forecast_FinancialStatement ModifyDate,Forecast_FinancialStatement CompanyType,Forecast_FinancialStatement ChangeOfFiscalYearEnd,Forecast_FinancialStatement NetSales,Forecast_FinancialStatement OperatingIncome,Forecast_FinancialStatement OrdinaryIncome,Forecast_FinancialStatement NetIncome,Result_Dividend FiscalPeriodEnd,Result_Dividend ReportType,Result_Dividend FiscalYear,Result_Dividend ModifyDate,Result_Dividend RecordDate,Result_Dividend DividendPayableDate,Result_Dividend QuarterlyDividendPerShare,Result_Dividend AnnualDividendPerShare,Forecast_Dividend FiscalPeriodEnd,Forecast_Dividend ReportType,Forecast_Dividend FiscalYear,Forecast_Dividend ModifyDate,Forecast_Dividend RecordDate,Forecast_Dividend QuarterlyDividendPerShare,Forecast_Dividend AnnualDividendPerShare,OperatingIncome_NetSales,OrdinaryIncome_NetSales,NetIncome_NetSales,NetSales_Growth,OperatingIncome_Growth,OrdinaryIncome_Growth,NetIncome_Growth,Forecast_NetSales_Growth,Forecast_OperatingIncome_Growth,Forecast_OrdinaryIncome_Growth,Forecast_NetIncome_Growth,Capital_Ratio,ROE,ROA,EndOfDayQuote ExchangeOfficialClose,Dividend_Yeild,CF_Operating_pn_-1.0,CF_Operating_pn_0.0,CF_Operating_pn_1.0,CF_Financing_pn_-1.0,CF_Financing_pn_0.0,CF_Financing_pn_1.0,CF_Investing_pn_-1.0,CF_Investing_pn_0.0,CF_Investing_pn_1.0,17_Sector_1,17_Sector_2,17_Sector_3,17_Sector_4,17_Sector_5,17_Sector_6,17_Sector_7,17_Sector_8,17_Sector_9,17_Sector_10,17_Sector_11,17_Sector_12,17_Sector_13,17_Sector_14,17_Sector_15,17_Sector_16,17_Sector_17,Result_FinancialStatement ReportType_Annual,Result_FinancialStatement ReportType_Q1,Result_FinancialStatement ReportType_Q2,Result_FinancialStatement ReportType_Q3
0,2016-02-05,1301,ConsolidatedJP,2015/12,2016.0,2016-02-05,GB,False,178890.0,2467.0,2688.0,1133.0,114363.0,23417.0,,,,ConsolidatedJP,2016/03,Annual,2016.0,2016/02/05,GB,False,229000.0,2600.0,3000.0,2200.0,,,,,,,,,2016/03,Annual,2016.0,2016/02/05,2016/03/31,5.0,5.0,0.013791,0.015026,0.006334,0.0,0.0,0.0,0.0,0.280116,0.053912,0.116071,0.941748,0.20476,0.048384,0.009907,2650.0,,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,2016-05-09,1301,ConsolidatedJP,2016/03,2016.0,2016-05-09,GB,False,226626.0,2433.0,2814.0,1799.0,94608.0,23065.0,2689.0,2482.0,-5114.0,ConsolidatedJP,2016/09,Q2,2017.0,2016/05/09,GB,False,117000.0,1400.0,1300.0,800.0,2016/03,Annual,2016.0,2016/05/09,2016/03/31,2016/06/27,5.0,5.0,2017/03,Annual,2017.0,2016/05/09,2017/03/31,50.0,50.0,0.010736,0.012417,0.007938,0.0,0.0,0.0,0.0,-0.483731,-0.424579,-0.538024,-0.555309,0.243795,0.077997,0.019015,2610.0,0.001916,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,2016-08-05,1301,ConsolidatedJP,2016/06,2017.0,2016-08-05,GB,False,52206.0,467.0,380.0,551.0,101632.0,22995.0,,,,ConsolidatedJP,2016/09,Q2,2017.0,2016/08/05,GB,False,117000.0,1400.0,1300.0,800.0,2016/03,Annual,2016.0,2016/08/05,2016/03/31,2016/06/27,5.0,5.0,2017/03,Annual,2017.0,2016/08/05,2017/03/31,50.0,50.0,0.008945,0.007279,0.010554,0.0,0.0,0.0,0.0,1.241122,1.997859,2.421053,0.451906,0.226257,0.023962,0.005422,2600.0,0.001923,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,2016-11-04,1301,ConsolidatedJP,2016/09,2017.0,2016-11-04,GB,False,109570.0,1171.0,1004.0,1186.0,106554.0,23600.0,,,,ConsolidatedJP,2017/03,Annual,2017.0,2016/11/04,GB,False,244000.0,3500.0,3300.0,2100.0,2016/03,Annual,2016.0,2016/11/04,2016/03/31,2016/06/27,5.0,5.0,2017/03,Annual,2017.0,2016/11/04,2017/03/31,50.0,50.0,0.010687,0.009163,0.010824,0.0,0.0,0.0,0.0,1.226887,1.988898,2.286853,0.770658,0.221484,0.050254,0.011131,2697.0,0.001854,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,2017-02-17,1301,ConsolidatedJP,2016/12,2017.0,2017-02-10,GB,False,179975.0,2872.0,2827.0,2449.0,117168.0,25779.0,,,,ConsolidatedJP,2017/03,Annual,2017.0,2017/02/17,GB,False,244000.0,3500.0,3300.0,2100.0,2016/03,Annual,2016.0,2017/02/17,2016/03/31,2016/06/27,5.0,5.0,2017/03,Annual,2017.0,2017/02/17,2017/03/31,60.0,60.0,0.015958,0.015708,0.013607,0.006065,0.164167,0.051711,1.161518,0.355744,0.218663,0.167315,-0.142507,0.220017,0.095,0.020902,2826.0,0.001769,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [27]:
# save
df_all_code_tech.to_csv('../data/all_code_tech.csv')
df_all_code_fund.to_csv('../data/all_code_fund.csv')

In [9]:
# 目的変数
# high-lowの差分
stock_labels['high_low_5'] = stock_labels['label_high_5'] - stock_labels['label_low_5']
stock_labels['high_low_10'] = stock_labels['label_high_10'] - stock_labels['label_low_10']
stock_labels['high_low_20'] = stock_labels['label_high_20'] - stock_labels['label_low_20']

# high-lowの中間
stock_labels['center_5'] = (stock_labels['label_high_5'] + stock_labels['label_low_5']) / 2
stock_labels['center_10'] = (stock_labels['label_high_10'] + stock_labels['label_low_10']) / 2
stock_labels['center_20'] = (stock_labels['label_high_20'] + stock_labels['label_low_20']) / 2

stock_labels.head()

Unnamed: 0,base_date,Local Code,label_date_5,label_high_5,label_low_5,label_date_10,label_high_10,label_low_10,label_date_20,label_high_20,label_low_20,high_low_5,high_low_10,high_low_20,center_5,center_10,center_20
0,2016-01-04,1301,2016-01-12,0.01091,-0.04,2016-01-19,0.01091,-0.05455,2016-02-02,0.01091,-0.08727,0.05091,0.06546,0.09818,-0.014545,-0.02182,-0.03818
1,2016-01-05,1301,2016-01-13,0.00362,-0.04348,2016-01-20,0.00362,-0.07609,2016-02-03,0.00362,-0.09058,0.0471,0.07971,0.0942,-0.01993,-0.036235,-0.04348
2,2016-01-06,1301,2016-01-14,0.0,-0.05072,2016-01-21,0.0,-0.08696,2016-02-04,0.00362,-0.09058,0.05072,0.08696,0.0942,-0.02536,-0.04348,-0.04348
3,2016-01-07,1301,2016-01-15,0.01107,-0.03321,2016-01-22,0.01107,-0.0738,2016-02-05,0.02214,-0.0738,0.04428,0.08487,0.09594,-0.01107,-0.031365,-0.02583
4,2016-01-08,1301,2016-01-18,0.01111,-0.03333,2016-01-25,0.01111,-0.07037,2016-02-08,0.02593,-0.07037,0.04444,0.08148,0.0963,-0.01111,-0.02963,-0.02222


# Dataset Class

In [421]:
from torch.utils.data import Dataset
from torch import nn

class JPX_Dataset(Dataset):
    
    def __init__(self, dfs, window_size):
        self.technical_index = dfs['technical_index']
        self.fundamental_index = dfs['fundamental_index']
        self.stock_label = dfs['stock_label']
        self.window_size = window_size
        
        # fundamental_indexの日付から直近windows_size日遡れないデータは削除する
        self.arrange_fund_table()
    
    def arrange_fund_table(self):
        '直近windows_size日分、遡れないデータは削除する'
        del_index = []
        
        # 削除対象インデックスを特定
        for i, code in enumerate(sorted(set(self.fundamental_index['Local Code']))):
            # 1銘柄に関するデータ
            df_one_code_tech = self.technical_index.loc[self.technical_index['Local Code'] == code].copy()
            df_one_code_fund = self.fundamental_index.loc[self.fundamental_index['Local Code'] == code].copy()

            # 時系列データの最初の日付を取得
            first_date = df_one_code_tech['EndOfDayQuote Date'].values[0]

            check = True
            for j in df_one_code_fund.index: ##メモ：後で消す
                if not check:
                    break

                if df_one_code_fund['base_date'][j] < first_date:
                    del_index.append(j)
                    continue
                else:
                    # ファンダメンタルデータと時系列データの日付同じインデックス番号を取得
                    same_date_index = df_one_code_tech[df_one_code_tech['EndOfDayQuote Date'] == df_one_code_fund['base_date'][j]].index[0]
                    # 判定
                    if df_one_code_tech.index[0] > same_date_index - self.window_size:
                        del_index.append(j)
                    else:
                        check = False
            
        self.fundamental_index = self.fundamental_index.drop(del_index, axis=0).copy()
        self.fundamental_index = self.fundamental_index.reset_index(drop=True)
        
    def get_past_data(self, code, base_date, n):
        '''銘柄(code)の基準日からn日前～基準日のデータを取り出す'''
        # 1銘柄に関する価格情報を取り出す
        df_one_stock = self.technical_index[self.technical_index['Local Code'] == code].copy()
        
        # 過去データを取り出す
        extract_cols = ['EndOfDayQuote Date', 'EndOfDayQuote Volume', 'log_R', 'return_5', 'return_25', 'return_75', 
                        'HV_5', 'HV_10', 'HV_25', "HV_50", 'HV_75', 'HV_100',
                        'MA20_HV5', 'MA20_HV10', 'MA20_HV25', 'MA20_HV50', 'MA20_HV75', 'MA20_HV100', 
                        'MADR5', 'MADR25', 'MADR75', 'MXDR5', 'MXDR10', 'MXDR20', 'MNDR5', 'MNDR10', 'MNDR20', 'RNDR', 
                        'RSI', 'H-L_C', 'MA25_H-L_C']
        
        base_date_index = df_one_stock[df_one_stock['EndOfDayQuote Date'] == base_date].index[0]
        
        return df_one_stock.loc[base_date_index-n+1:base_date_index][extract_cols].iloc[:, 1:].values
    
    def __len__(self):
        return len(self.fundamental_index)
     
    def __getitem__(self, i):
        # インデックスiに対応する銘柄コード、日付を取り出す
        code_i = self.fundamental_index['Local Code'][i]
        date_i = self.fundamental_index['base_date'][i]
        #print(code_i, date_i)
        
        # 過去の時系列データ(windowsize_分)を取り出す
        data_ts = self.get_past_data(code_i, date_i, self.window_size)
        data_cs = self.fundamental_index.iloc[i, 2:].values.astype(np.float64)
        
        # ラベルデータを取り出す
        stock_label_target = self.stock_label[(self.stock_label['base_date']== date_i) & (self.stock_label['Local Code']==code_i)]
        label = stock_label_target[['label_high_20', 'label_low_20', 'high_low_20', 'center_20']].values
        
        # numpy -> Torch,Tensor
        data_ts = torch.from_numpy(data_ts).float()
        data_cs = torch.from_numpy(data_cs).float()
        label = torch.from_numpy(label).float()
        
        return data_ts, data_cs, label

In [350]:
# 前処理
df_all_code_tech['EndOfDayQuote Date'] = pd.to_datetime(df_all_code_tech['EndOfDayQuote Date'])
stock_labels['base_date'] = pd.to_datetime(stock_labels['base_date'])

delete_cols = df_all_code_fund.iloc[:, 2:43].columns
df_all_code_fund.fillna(0, inplace=True)
df_all_code_fund.reset_index(drop=True, inplace=True)


# 辞書にまとめる
dfs = {
    'technical_index':df_all_code_tech,
    'fundamental_index':df_all_code_fund.drop(delete_cols, axis=1),
    'stock_label':stock_labels,
}

window_size = 5
ds = JPX_Dataset(dfs, window_size)

In [351]:
ds[20][0]

1332 2016-02-05 00:00:00


tensor([[ 4.7543e+06,  4.0694e-02,  1.0526e-01,  0.0000e+00,  0.0000e+00,
          1.1180e-02,  4.4489e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  7.0724e-02,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00, -7.4753e-02,  8.3189e-02,  8.5069e-02,
          8.5069e-02, -7.0000e-02,  5.6311e-01,  4.7619e-02,  0.0000e+00],
        [ 4.9409e+06, -4.6118e-03,  1.1149e-01,  0.0000e+00,  0.0000e+00,
          2.8243e-02,  4.7180e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  4.3478e-02,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  1.1438e-01,  1.1632e-01,
          1.1632e-01,  8.0000e-02,  5.0794e-01,  4.1667e-02,  0.0000e+00],
        [ 4.1135e+06, -3.1301e-02,  7.5342e-02,  0.0000e+00,  0.0000e+00,
          2.9787e-02,  4.7774e-02,  

In [352]:
ds.technical_index[ds.technical_index['Local Code'] == 1332].loc[1240:].head()

Unnamed: 0,Local Code,EndOfDayQuote Date,EndOfDayQuote Open,EndOfDayQuote High,EndOfDayQuote Low,EndOfDayQuote Close,EndOfDayQuote ExchangeOfficialClose,EndOfDayQuote Volume,EndOfDayQuote CumulativeAdjustmentFactor,EndOfDayQuote PreviousClose,EndOfDayQuote PreviousCloseDate,EndOfDayQuote PreviousExchangeOfficialClose,EndOfDayQuote PreviousExchangeOfficialCloseDate,EndOfDayQuote ChangeFromPreviousClose,EndOfDayQuote PercentChangeFromPreviousClose,EndOfDayQuote VWAP,log_R,return_5,return_25,return_75,HV_5,HV_10,HV_25,HV_50,HV_75,HV_100,MA20_HV5,MA20_HV10,MA20_HV25,MA20_HV50,MA20_HV75,MA20_HV100,MADR5,MADR25,MADR75,MXDR5,MXDR10,MXDR20,MNDR5,MNDR10,MNDR20,RNDR,RSI,H-L_C,MA5_H-L_C,MA10_H-L_C,MA25_H-L_C,MA50_H-L_C,MA75_H-L_C,MA100_H-L_C
1240,1332,2016-02-01,630.0,656.0,625.0,651.0,651.0,4754300.0,1.0,625.0,2016/01/29,625.0,2016/01/29,26.0,4.16,644.696,0.040694,0.105263,0.0,0.0,0.01118,0.044489,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.070724,0.0,0.0,0.0,0.0,-0.074753,0.083189,0.085069,0.085069,-0.07,0.563107,0.047619,0.040782,0.04431,0.0,0.0,0.0,0.0
1241,1332,2016-02-02,646.0,670.0,643.0,648.0,648.0,4940900.0,1.0,651.0,2016/02/01,651.0,2016/02/01,-3.0,-0.461,657.737,-0.004612,0.111492,0.0,0.0,0.028243,0.04718,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.114385,0.116319,0.116319,0.08,0.507937,0.041667,0.043627,0.045997,0.0,0.0,0.0,0.0
1242,1332,2016-02-03,636.0,637.0,614.0,628.0,628.0,4113500.0,1.0,648.0,2016/02/02,648.0,2016/02/02,-20.0,-3.086,625.736,-0.031301,0.075342,0.0,0.0,0.029787,0.047774,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.002858,0.0,0.0,-0.049254,-0.049254,-0.049254,0.062284,0.065972,0.065972,0.046667,0.533333,0.036624,0.044102,0.045039,0.0,0.0,0.0,0.0
1243,1332,2016-02-04,621.0,625.0,592.0,596.0,596.0,3378300.0,1.0,628.0,2016/02/03,628.0,2016/02/03,-32.0,-5.096,603.018,-0.052214,-0.001675,0.0,0.0,0.025945,0.045369,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.053367,0.0,0.0,-0.067164,-0.067164,-0.067164,0.0,0.025997,0.027778,-0.006667,0.461538,0.055369,0.047136,0.044857,0.0,0.0,0.0,0.0
1244,1332,2016-02-05,590.0,624.0,580.0,591.0,591.0,6473000.0,1.0,596.0,2016/02/04,596.0,2016/02/04,-5.0,-0.839,594.383,-0.00841,-0.0544,0.0,0.0,0.033763,0.035279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.05106,0.0,0.0,-0.068657,-0.068657,-0.068657,0.0,0.005199,0.006944,-0.015,0.454976,0.07445,0.051146,0.048163,0.0,0.0,0.0,0.0


In [353]:
ds.fundamental_index[ds.fundamental_index['Local Code'] == 1332].head()

Unnamed: 0,base_date,Local Code,OperatingIncome_NetSales,OrdinaryIncome_NetSales,NetIncome_NetSales,NetSales_Growth,OperatingIncome_Growth,OrdinaryIncome_Growth,NetIncome_Growth,Forecast_NetSales_Growth,Forecast_OperatingIncome_Growth,Forecast_OrdinaryIncome_Growth,Forecast_NetIncome_Growth,Capital_Ratio,ROE,ROA,EndOfDayQuote ExchangeOfficialClose,Dividend_Yeild,CF_Operating_pn_-1.0,CF_Operating_pn_0.0,CF_Operating_pn_1.0,CF_Financing_pn_-1.0,CF_Financing_pn_0.0,CF_Financing_pn_1.0,CF_Investing_pn_-1.0,CF_Investing_pn_0.0,CF_Investing_pn_1.0,17_Sector_1,17_Sector_2,17_Sector_3,17_Sector_4,17_Sector_5,17_Sector_6,17_Sector_7,17_Sector_8,17_Sector_9,17_Sector_10,17_Sector_11,17_Sector_12,17_Sector_13,17_Sector_14,17_Sector_15,17_Sector_16,17_Sector_17,Result_FinancialStatement ReportType_Annual,Result_FinancialStatement ReportType_Q1,Result_FinancialStatement ReportType_Q2,Result_FinancialStatement ReportType_Q3
20,2016-02-05,1332,0.035628,0.038829,0.023085,0.0,0.0,0.0,0.0,0.314927,0.038002,0.058257,0.023496,0.240797,0.098131,0.02363,591.0,0.0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
21,2016-05-13,1332,0.030513,0.032481,0.019315,0.0,0.0,0.0,0.0,-0.505622,-0.58852,-0.589293,-0.634354,0.255841,0.107928,0.027612,552.0,0.005435,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
22,2016-08-05,1332,0.026698,0.025061,0.010579,0.0,0.0,0.0,0.0,1.014827,0.916627,1.169474,1.720677,0.246581,0.015293,0.003771,453.0,0.006623,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
23,2016-11-04,1332,0.032302,0.030406,0.01545,0.0,0.0,0.0,0.0,0.991704,0.987362,1.16544,1.557,0.276056,0.038613,0.010659,522.0,0.004789,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
24,2017-02-21,1332,0.039917,0.044196,0.0264,-0.036773,0.079177,0.096354,0.101549,0.33953,0.148873,0.13417,0.13113,0.291469,0.093327,0.027202,562.0,0.004448,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


# Train/Validation Dataset

In [354]:
# データの分割期間の設定
TRAIN_END = "2017-11-30"
VAL_START = "2018-01-01"
VAL_END = "2018-12-01"
TEST_START = "2019-01-01"

In [355]:
# 各データのインデックスを取得
train_index = ds.fundamental_index.index[ds.fundamental_index['base_date'] <= TRAIN_END].values
val_index = ds.fundamental_index.index[(ds.fundamental_index['base_date'] >= VAL_START) & (ds.fundamental_index['base_date'] <= VAL_END)] .values
test_index = ds.fundamental_index.index[ds.fundamental_index['base_date'] >= TEST_START].values

In [356]:
from torch.utils.data.dataset import Subset

train_ds = Subset(ds, train_index)
valid_ds = Subset(ds, val_index)
test_ds = Subset(ds, test_index)


print('train size:',len(train_ds))
print('valid size:',len(valid_ds))
print('test size:',len(test_ds))

train size: 26509
valid size: 13662
test size: 29027


# DataLoader

In [357]:
batch_size = 2

# make DataLoder
train_dataloader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dataloader = torch.utils.data.DataLoader(valid_ds, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size, shuffle=True)

# dict
dataloaders_dict = {'train': train_dataloader,
                    'val'  : valid_dataloader,
                    'test' : test_dataloader}

In [358]:
# Check
batch_iterator = iter(dataloaders_dict['train'])
inputs_ts, inputs_cs, labels = next(batch_iterator)
print(inputs_ts.size(), inputs_cs.size(), labels)

1972 2017-10-27 00:00:00
5909 2017-10-27 00:00:00
torch.Size([2, 5, 30]) torch.Size([2, 46]) tensor([[[ 0.0951,  0.0039,  0.0913,  0.0495]],

        [[ 0.0443, -0.0584,  0.1027, -0.0070]]])


## モデル構築
Network(TCN)  
https://github.com/locuslab/TCN/blob/master/TCN/tcn.py

In [5]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm


class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU() #nn.SiLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [6]:
class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)
        
        self.fc1 = nn.Linear(106, 128)
        self.dropout1 = nn.Dropout(dropout)
        self.batch_norm1 = nn.BatchNorm1d(128)
        self.LeakyReLU1 = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        #self.SiLU1 = nn.SiLU()
        
        self.fc2 = nn.Linear(128,128)
        self.dropout2 = nn.Dropout(dropout)
        self.batch_norm2 = nn.BatchNorm1d(128)
        self.LeakyReLU2 = nn.LeakyReLU(negative_slope=0.01, inplace=True)
        #self.SiLU2 = nn.SiLU()
        
        self.fc3 = nn.Linear(128, output_size)
        
    def forward(self, inputs_ts, inputs_cs):
        
        inputs_ts = self.gnoise(inputs_ts)
        y1 = self.tcn(inputs_ts)  # input should have dimension (N, C, L)
        y1 = torch.flatten(y1, start_dim=1)
        
        # concate:y1(N, 60) + inputs_cs(N, 46) -> (N, 106)
        y1 = torch.cat([y1, inputs_cs], dim=1)
        
        y1 = self.fc1(y1)
        y1 = self.batch_norm1(y1)
        y1 = self.LeakyReLU1(y1)
        #y1 = self.SiLU1(y1)
        
        y1 = self.dropout1(y1)
        
        y1 = self.fc2(y1)
        y1 = self.batch_norm2(y1)
        y1 = self.LeakyReLU2(y1)
        #y1 = self.SiLU2(y1)
        
        y1 = self.dropout2(y1)
        
        o = self.fc3(y1)
        return torch.sigmoid(o)

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('use devise:', device)

net = TCN(input_size=window_size, output_size=4, num_channels=[16, 8, 4, 2], kernel_size=2, dropout=0.5)
print(net)

use devise: cpu
TCN(
  (tcn): TemporalConvNet(
    (network): Sequential(
      (0): TemporalBlock(
        (conv1): Conv1d(5, 16, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp1): Chomp1d()
        (relu1): ReLU()
        (dropout1): Dropout(p=0.5, inplace=False)
        (conv2): Conv1d(16, 16, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp2): Chomp1d()
        (relu2): ReLU()
        (dropout2): Dropout(p=0.5, inplace=False)
        (net): Sequential(
          (0): Conv1d(5, 16, kernel_size=(2,), stride=(1,), padding=(1,))
          (1): Chomp1d()
          (2): ReLU()
          (3): Dropout(p=0.5, inplace=False)
          (4): Conv1d(16, 16, kernel_size=(2,), stride=(1,), padding=(1,))
          (5): Chomp1d()
          (6): ReLU()
          (7): Dropout(p=0.5, inplace=False)
        )
        (downsample): Conv1d(5, 16, kernel_size=(1,), stride=(1,))
        (relu): ReLU()
      )
      (1): TemporalBlock(
        (conv1): Conv1d(16, 8, kernel_size=(2,), 

In [392]:
inputs_ts.size()

torch.Size([2, 5, 30])

In [416]:
# Check
o = net(inputs_ts, inputs_cs)
print(o.size())
print(train_ds[0][2].size())
o


torch.Size([2, 4])
1301 2016-02-05 00:00:00
torch.Size([1, 4])


tensor([[0.1973, 0.0177, 0.1685, 0.0498],
        [0.0415, 0.0139, 0.1199, 0.0461]], grad_fn=<SigmoidBackward>)

In [418]:
o.unsqueeze(1).size()

torch.Size([2, 1, 4])

# Loss Function/Optim

In [401]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Train

In [419]:
from tqdm import tqdm

def train_model(net, dataloader_dict, criterion, optimizer, num_epochs):
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('use devise:', device)
    
    net.to(device)
    #torch.backends.cudnn.deterministic = True
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('--------------------------')
        
        for phase in ['train', 'val']:

            epoch_loss = 0.0
        
            for inputs_ts, inputs_cs, labels in tqdm(dataloader_dict[phase]):
                
                inputs_ts = inputs_ts.to(device)
                inputs_cs = inputs_cs.to(device)
                labels = labels.to(device)

                # init optimizer:勾配パラメータを0にする
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):

                    outputs = net(inputs_ts, inputs_cs)
                    outputs = outputs.unsqueeze(1) #(N, 4) -> (N, 1, 4)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    
                    epoch_loss += loss.item() * inputs_ts.size(0)
                
            # print Score
            print('{} Loss: {:.4f}'.format(phase, epoch_loss))
            
        # save model
        if phase == 'val':
            if epoch == 0:
                best_val_loss = epoch_loss
                save = True
            elif best_val_loss > epoch_loss:
                best_val_loss = epoch_loss
                save = True
            if save:
                print('Best score updated. New model was saved.')
                #torch.save(net.state_dict(), './model.mdl')
                save = False

In [422]:
num_epochs = 2
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs)

  0%|          | 2/13255 [00:00<18:21, 12.03it/s]

use devise: cpu
Epoch 1/2
--------------------------
9070 2016-05-10 00:00:00
6839 2017-02-13 00:00:00
label torch.Size([2, 1, 4])
6806 2017-07-28 00:00:00
7879 2017-07-13 00:00:00
label torch.Size([2, 1, 4])
5268 2016-02-10 00:00:00


  0%|          | 4/13255 [00:00<18:24, 12.00it/s]

2489 2016-08-03 00:00:00
label torch.Size([2, 1, 4])
6347 2016-11-10 00:00:00
2477 2016-08-10 00:00:00
label torch.Size([2, 1, 4])
2185 2016-02-10 00:00:00
3133 2016-11-10 00:00:00
label torch.Size([2, 1, 4])


  0%|          | 6/13255 [00:00<18:53, 11.69it/s]

1890 2016-05-12 00:00:00
8345 2016-08-05 00:00:00
label torch.Size([2, 1, 4])
4685 2017-01-27 00:00:00
6092 2017-08-10 00:00:00
label torch.Size([2, 1, 4])
6078 2016-05-13 00:00:00


  0%|          | 8/13255 [00:00<19:11, 11.51it/s]

8137 2016-10-28 00:00:00
label torch.Size([2, 1, 4])
5951 2017-05-12 00:00:00
7305 2016-11-11 00:00:00
label torch.Size([2, 1, 4])
7315 2016-02-05 00:00:00
4021 2016-05-12 00:00:00


  0%|          | 11/13255 [00:01<22:03, 10.01it/s]

label torch.Size([2, 1, 4])
9930 2017-05-12 00:00:00
4925 2016-02-10 00:00:00
label torch.Size([2, 1, 4])
6590 2017-11-02 00:00:00


  0%|          | 12/13255 [00:01<27:21,  8.07it/s]

3161 2017-03-10 00:00:00
label torch.Size([2, 1, 4])
1884 2016-05-13 00:00:00
2926 2016-07-25 00:00:00
label torch.Size([2, 1, 4])
7815 2017-05-12 00:00:00
9619 2016-04-28 00:00:00


  0%|          | 16/13255 [00:01<23:01,  9.59it/s]

label torch.Size([2, 1, 4])
4287 2016-06-13 00:00:00
6293 2016-08-05 00:00:00
label torch.Size([2, 1, 4])
2408 2017-01-13 00:00:00
2193 2017-11-09 00:00:00
label torch.Size([2, 1, 4])
9647 2016-10-14 00:00:00


  0%|          | 18/13255 [00:01<22:20,  9.87it/s]

3461 2016-05-09 00:00:00
label torch.Size([2, 1, 4])
3030 2016-07-14 00:00:00
4955 2016-08-10 00:00:00
label torch.Size([2, 1, 4])
6365 2017-11-14 00:00:00
3690 2017-08-04 00:00:00


  0%|          | 20/13255 [00:01<21:43, 10.15it/s]

label torch.Size([2, 1, 4])
8746 2016-11-11 00:00:00
4282 2016-08-01 00:00:00
label torch.Size([2, 1, 4])
2471 2016-01-13 00:00:00
6145 2017-02-03 00:00:00
label torch.Size([2, 1, 4])
6286 2016-11-10 00:00:00


  0%|          | 22/13255 [00:02<21:14, 10.38it/s]

9873 2016-11-10 00:00:00
label torch.Size([2, 1, 4])
4004 2016-02-09 00:00:00
7927 2017-05-12 00:00:00
label torch.Size([2, 1, 4])
6258 2016-02-10 00:00:00
3779 2017-07-21 00:00:00


  0%|          | 26/13255 [00:02<20:17, 10.86it/s]

label torch.Size([2, 1, 4])
3306 2016-08-10 00:00:00
6807 2016-01-27 00:00:00
label torch.Size([2, 1, 4])
7531 2016-05-11 00:00:00
3914 2016-05-13 00:00:00
label torch.Size([2, 1, 4])
8515 2016-11-14 00:00:00


  0%|          | 28/13255 [00:02<21:11, 10.40it/s]

9507 2017-10-26 00:00:00
label torch.Size([2, 1, 4])
9639 2016-08-10 00:00:00
4347 2017-01-27 00:00:00
label torch.Size([2, 1, 4])
3176 2017-05-10 00:00:00


  0%|          | 30/13255 [00:02<22:06,  9.97it/s]

3694 2016-05-13 00:00:00
label torch.Size([2, 1, 4])
3565 2017-06-08 00:00:00
7609 2017-11-02 00:00:00
label torch.Size([2, 1, 4])
4350 2016-01-29 00:00:00


  0%|          | 32/13255 [00:03<22:08,  9.95it/s]

1728 2017-11-02 00:00:00
label torch.Size([2, 1, 4])
2692 2016-02-01 00:00:00
6190 2017-02-14 00:00:00
label torch.Size([2, 1, 4])
5162 2016-11-08 00:00:00
9438 2016-10-31 00:00:00


  0%|          | 34/13255 [00:03<22:23,  9.84it/s]

label torch.Size([2, 1, 4])
4543 2016-08-04 00:00:00
6473 2017-04-28 00:00:00
label torch.Size([2, 1, 4])
7244 2016-02-10 00:00:00
1798 2016-08-05 00:00:00


  0%|          | 36/13255 [00:03<22:29,  9.79it/s]

label torch.Size([2, 1, 4])
3467 2016-08-05 00:00:00
6188 2017-11-08 00:00:00
label torch.Size([2, 1, 4])
9686 2016-02-02 00:00:00
8397 2017-02-09 00:00:00


  0%|          | 38/13255 [00:03<22:26,  9.81it/s]

label torch.Size([2, 1, 4])
5950 2016-05-10 00:00:00
8804 2017-08-07 00:00:00
label torch.Size([2, 1, 4])
7433 2017-04-28 00:00:00
2178 2016-09-30 00:00:00
label torch.Size([2, 1, 4])


  0%|          | 40/13255 [00:03<21:52, 10.07it/s]

4312 2016-05-11 00:00:00
3254 2017-08-07 00:00:00
label torch.Size([2, 1, 4])
6932 2016-10-31 00:00:00
1961 2016-05-13 00:00:00
label torch.Size([2, 1, 4])
2341 2016-04-12 00:00:00


  0%|          | 42/13255 [00:04<21:09, 10.41it/s]

7912 2017-02-09 00:00:00
label torch.Size([2, 1, 4])
1827 2017-05-12 00:00:00
9051 2016-11-09 00:00:00
label torch.Size([2, 1, 4])
6941 2016-08-05 00:00:00
6954 2017-10-25 00:00:00
label torch.Size([2, 1, 4])


  0%|          | 46/13255 [00:04<19:36, 11.23it/s]

4681 2017-02-09 00:00:00
7180 2016-02-08 00:00:00
label torch.Size([2, 1, 4])
4099 2017-07-26 00:00:00
8382 2016-11-11 00:00:00
label torch.Size([2, 1, 4])
5711 2017-11-08 00:00:00
9672 2017-07-28 00:00:00


  0%|          | 48/13255 [00:04<19:00, 11.58it/s]

label torch.Size([2, 1, 4])
3646 2016-01-28 00:00:00
3276 2017-11-13 00:00:00
label torch.Size([2, 1, 4])
1433 2016-03-16 00:00:00
9366 2016-10-31 00:00:00
label torch.Size([2, 1, 4])
4521 2016-11-07 00:00:00


  0%|          | 50/13255 [00:04<19:30, 11.28it/s]

6874 2016-08-10 00:00:00
label torch.Size([2, 1, 4])
7851 2017-02-13 00:00:00
5727 2016-04-27 00:00:00
label torch.Size([2, 1, 4])
1518 2017-02-03 00:00:00
7958 2016-05-13 00:00:00
label torch.Size([2, 1, 4])


  0%|          | 54/13255 [00:05<18:50, 11.68it/s]

3277 2016-08-10 00:00:00
6504 2016-01-28 00:00:00
label torch.Size([2, 1, 4])
3395 2017-05-15 00:00:00
8157 2017-10-27 00:00:00
label torch.Size([2, 1, 4])
3246 2017-03-13 00:00:00


  0%|          | 56/13255 [00:05<19:00, 11.57it/s]

4102 2016-08-05 00:00:00
label torch.Size([2, 1, 4])
8705 2016-11-14 00:00:00
9401 2017-08-03 00:00:00
label torch.Size([2, 1, 4])
7203 2016-02-05 00:00:00
8163 2016-05-11 00:00:00
label torch.Size([2, 1, 4])


  0%|          | 56/13255 [00:05<20:52, 10.54it/s]


KeyboardInterrupt: 