In [3]:
# build dataset for stacked LSTM with feature Embeddings
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path

import numpy as np
import pandas as pd
np.random.seed(42)
idx = pd.IndexSlice

In [4]:
DATA_DIR = Path('data')
prices = (pd.read_hdf(DATA_DIR / 'assets.h5', 'quandl/wiki/prices')
          .loc[idx['2010':'2017', :], ['adj_close', 'adj_volume']])
prices.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 5698754 entries, (Timestamp('2010-01-04 00:00:00'), 'A') to (Timestamp('2017-12-29 00:00:00'), 'ZUMZ')
Data columns (total 2 columns):
 #   Column      Dtype  
---  ------      -----  
 0   adj_close   float64
 1   adj_volume  float64
dtypes: float64(2)
memory usage: 109.5+ MB


In [5]:
# Select most traded stocks
n_dates = len(prices.index.unique('date'))
dollar_vol = (prices.adj_close.mul(prices.adj_volume).unstack('ticker')
              .dropna(thresh=int(.95 * n_dates), axis=1).rank(ascending=False, axis=1).stack('ticker'))
most_traded = dollar_vol.groupby(level='ticker').mean().nsmallest(500).index
returns = (prices.loc[idx[:, most_traded], 'adj_close'].unstack('ticker').pct_change()
           .sort_index(ascending=False))
returns.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2013 entries, 2017-12-29 to 2010-01-04
Columns: 500 entries, AAPL to CNC
dtypes: float64(500)
memory usage: 7.7 MB


In [6]:
returns.tail(10)

ticker,AAPL,BAC,GOOGL,MSFT,AMZN,C,XOM,JPM,GE,INTC,...,FRT,CDNS,GRMN,FNF,FTNT,EAT,ATHN,RGLD,MTD,CNC
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-15,-0.016712,-0.033294,-0.016699,-0.00323,-0.001649,-0.025641,-0.00818,-0.0226,-0.015569,-0.031657,...,0.008054,-0.015823,-0.001705,-0.007407,-0.013568,-0.017476,-0.009643,-0.017277,-0.026296,0.001818
2010-01-14,-0.005792,0.012034,0.004701,0.020099,-0.013632,0.002857,0.000144,0.009944,-0.007724,0.024809,...,-0.004011,-0.010955,0.014999,-0.004425,0.023663,0.043214,-0.002187,-0.015372,0.008255,0.013825
2010-01-13,0.014106,0.015892,-0.005741,0.009312,0.01382,-0.005682,-0.004003,0.017475,0.003578,0.017081,...,0.019845,0.030645,0.025436,0.023396,-0.017686,0.024205,0.041211,0.007225,0.013584,0.017824
2010-01-12,-0.011375,-0.033668,-0.017684,-0.006607,-0.0227,-0.030303,-0.004979,-0.023355,0.000597,-0.016325,...,-0.017123,-0.022082,-0.013998,0.000755,-0.012968,0.000692,-0.042929,-0.031587,-0.014168,-0.003738
2010-01-11,-0.008822,0.008939,-0.001512,-0.01272,-0.024056,0.011142,0.01122,-0.003357,0.009639,0.005761,...,0.012513,0.0,0.014797,0.008378,-0.002984,0.001386,-0.027548,0.007452,0.008909,-0.007881
2010-01-08,0.006648,-0.00886,0.013331,0.00683,0.027077,-0.016438,-0.004011,-0.002456,0.021538,0.011165,...,-0.006292,0.0144,0.050357,-0.000761,0.033933,-0.017699,-0.00359,0.026463,0.016926,-0.010096
2010-01-07,-0.001849,0.032947,-0.02328,-0.010335,-0.017013,0.002747,-0.003142,0.019809,0.05178,-0.009615,...,-0.0003,0.019576,0.004685,-0.016467,0.002061,0.000681,-0.001055,-0.007795,-0.017317,0.024929
2010-01-06,-0.015906,0.011728,-0.025209,-0.006137,-0.018116,0.031161,0.008643,0.005495,-0.005151,-0.003354,...,-0.002689,0.011551,-0.00744,0.021407,0.055465,0.022284,0.010874,0.018383,-0.00767,0.011899
2010-01-05,0.001729,0.032505,-0.004404,0.000323,0.0059,0.038235,0.003905,0.01937,0.005178,-0.000479,...,-0.001193,-0.009804,0.002486,-0.038942,0.021667,-0.019126,0.00579,-0.002708,-0.024991,-0.031797
2010-01-04,,,,,,,,,,,...,,,,,,,,,,


In [7]:
# Stack 21-day time series
n = len(returns)
T = 21 # days
tcols = list(range(T))
tickers = returns.columns
data = pd.DataFrame()
for i in range(n-T-1):
    df = returns.iloc[i:i+T+1]
    date = df.index.max()
    data = pd.concat([data, df.reset_index(drop=True).T.assign(date=date, ticker=tickers)
                      .set_index(['ticker', 'date'])])
data = data.rename(columns={0: 'label'}).sort_index().dropna()
data.loc[:, tcols[1:]] = (data.loc[:, tcols[1:]].apply(lambda x: x.clip(lower=x.quantile(.01),
                                                  upper=x.quantile(.99))))
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 995499 entries, ('A', Timestamp('2010-02-04 00:00:00')) to ('ZION', Timestamp('2017-12-29 00:00:00'))
Data columns (total 22 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   label   995499 non-null  float64
 1   1       995499 non-null  float64
 2   2       995499 non-null  float64
 3   3       995499 non-null  float64
 4   4       995499 non-null  float64
 5   5       995499 non-null  float64
 6   6       995499 non-null  float64
 7   7       995499 non-null  float64
 8   8       995499 non-null  float64
 9   9       995499 non-null  float64
 10  10      995499 non-null  float64
 11  11      995499 non-null  float64
 12  12      995499 non-null  float64
 13  13      995499 non-null  float64
 14  14      995499 non-null  float64
 15  15      995499 non-null  float64
 16  16      995499 non-null  float64
 17  17      995499 non-null  float64
 18  18      995499 non-null  float64
 19  19      9954

In [8]:
data.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,label,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,2010-02-04,-0.015911,0.001356,0.012702,0.039244,-0.023005,-0.016455,-0.004097,-0.005433,0.009599,-0.044233,...,0.012159,-0.023114,0.014989,0.007882,-0.012005,0.000649,-0.000325,-0.001297,-0.003553,-0.010863
A,2010-02-05,0.00344,-0.015911,0.001356,0.012702,0.039244,-0.023005,-0.016455,-0.004097,-0.005433,0.009599,...,-0.005519,0.012159,-0.023114,0.014989,0.007882,-0.012005,0.000649,-0.000325,-0.001297,-0.003553
A,2010-02-08,0.001028,0.00344,-0.015911,0.001356,0.012702,0.039244,-0.023005,-0.016455,-0.004097,-0.005433,...,-0.003591,-0.005519,0.012159,-0.023114,0.014989,0.007882,-0.012005,0.000649,-0.000325,-0.001297
A,2010-02-09,0.007192,0.001028,0.00344,-0.015911,0.001356,0.012702,0.039244,-0.023005,-0.016455,-0.004097,...,-0.044233,-0.003591,-0.005519,0.012159,-0.023114,0.014989,0.007882,-0.012005,0.000649,-0.000325
A,2010-02-10,-0.00544,0.007192,0.001028,0.00344,-0.015911,0.001356,0.012702,0.039244,-0.023005,-0.016455,...,0.009599,-0.044233,-0.003591,-0.005519,0.012159,-0.023114,0.014989,0.007882,-0.012005,0.000649
A,2010-02-11,0.003761,-0.00544,0.007192,0.001028,0.00344,-0.015911,0.001356,0.012702,0.039244,-0.023005,...,-0.005433,0.009599,-0.044233,-0.003591,-0.005519,0.012159,-0.023114,0.014989,0.007882,-0.012005
A,2010-02-12,0.021117,0.003761,-0.00544,0.007192,0.001028,0.00344,-0.015911,0.001356,0.012702,0.039244,...,-0.004097,-0.005433,0.009599,-0.044233,-0.003591,-0.005519,0.012159,-0.023114,0.014989,0.007882
A,2010-02-16,0.009006,0.021117,0.003761,-0.00544,0.007192,0.001028,0.00344,-0.015911,0.001356,0.012702,...,-0.016455,-0.004097,-0.005433,0.009599,-0.044233,-0.003591,-0.005519,0.012159,-0.023114,0.014989
A,2010-02-17,0.012562,0.009006,0.021117,0.003761,-0.00544,0.007192,0.001028,0.00344,-0.015911,0.001356,...,-0.023005,-0.016455,-0.004097,-0.005433,0.009599,-0.044233,-0.003591,-0.005519,0.012159,-0.023114
A,2010-02-18,0.011427,0.012562,0.009006,0.021117,0.003761,-0.00544,0.007192,0.001028,0.00344,-0.015911,...,0.039244,-0.023005,-0.016455,-0.004097,-0.005433,0.009599,-0.044233,-0.003591,-0.005519,0.012159


In [9]:
data.shape

(995499, 22)

In [6]:
data.to_hdf('rnn_data.h5', 'returns_daily')

In [10]:
# Build weekly dataset
prices = (pd.read_hdf(DATA_DIR / 'assets.h5', 'quandl/wiki/prices').adj_close.unstack().loc['2007':])
prices.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2896 entries, 2007-01-01 to 2018-03-27
Columns: 3199 entries, A to ZUMZ
dtypes: float64(3199)
memory usage: 70.7 MB


In [11]:
prices.sample(10)

ticker,A,AA,AAL,AAMC,AAN,AAOI,AAON,AAP,AAPL,AAT,...,ZIOP,ZIXI,ZLC,ZLTQ,ZMH,ZNGA,ZOES,ZQK,ZTS,ZUMZ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-07-17,27.153179,,91.721476,,16.551349,,5.954568,38.257252,17.851858,,...,5.04,2.04,22.99,,85.420753,,,13.86,,41.34
2016-07-27,46.691791,,35.771299,13.03,22.805042,12.74,26.350613,164.35288,100.611757,43.312284,...,5.02,3.93,,33.67,,2.89,35.48,,49.469502,16.87
2013-09-20,36.134688,,49.082682,365.41,27.096127,,17.070743,80.094596,61.767655,27.973987,...,3.28,4.94,14.13,9.6,81.716214,3.485,,6.82,31.179033,27.31
2013-07-25,32.330672,,48.926782,320.0,28.135152,,14.978867,81.592863,57.56638,30.178821,...,2.93,4.53,9.3,6.2,81.509736,3.5,,6.51,29.497655,28.17
2014-05-23,39.200531,,37.904693,942.0,32.551478,18.76,20.261686,122.517507,82.576969,31.424208,...,3.36,3.31,21.75,15.82,100.831875,3.28,28.83,6.22,29.770519,28.79
2009-01-30,12.334912,,14.7326,,11.439045,,4.870917,31.870529,11.582961,,...,0.82,1.3,1.24,,35.205111,,,2.1,,7.15
2016-06-15,44.284879,,30.12892,16.92,22.446535,10.19,25.824593,153.288269,94.933716,39.61974,...,6.12,4.06,,27.9,,2.64,37.25,,46.763873,15.0
2011-06-02,32.870356,,23.670897,,27.408453,,8.897639,59.592923,44.478931,18.108518,...,6.98,3.63,6.35,,64.152611,,,4.38,,25.84
2011-05-11,34.132502,,25.957438,,26.806749,,9.142171,67.786827,44.623895,18.291181,...,6.6,3.5,3.6,,66.88004,,,4.74,,28.82
2008-03-26,20.460398,,22.163858,,13.052036,,5.433317,34.026984,18.642232,,...,2.65,3.88,21.87,,75.497555,,,10.49,,18.15


In [12]:
# Resample to weekly frequency
returns = (prices.resample('W').last().pct_change().loc['2008': '2017'].dropna(axis=1)
           .sort_index(ascending=False))
returns.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 522 entries, 2017-12-31 to 2008-01-06
Freq: -1W-SUN
Columns: 2489 entries, A to ZUMZ
dtypes: float64(2489)
memory usage: 9.9 MB


In [13]:
returns.head().append(returns.tail())

ticker,A,AAL,AAN,AAON,AAP,AAPL,AAWW,ABAX,ABC,ABCB,...,ZEUS,ZIGO,ZINC,ZION,ZIOP,ZIXI,ZLC,ZMH,ZQK,ZUMZ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-12-31,-0.005642,-0.010648,-0.010184,-0.001361,-0.008553,-0.033027,-0.024938,-0.001814,-0.006922,-0.019329,...,-0.029797,0.0,0.0,-0.009741,0.022222,-0.01573,0.0,0.0,0.0,-0.029138
2017-12-24,-0.003846,0.029965,0.090171,0.044034,-0.00149,0.006557,0.046087,0.032681,-0.00762,0.017598,...,0.032153,0.0,0.0,0.026395,-0.068966,-0.024123,0.0,0.0,0.0,0.067164
2017-12-17,0.003413,0.000784,-0.052591,-0.014006,0.003888,0.026569,0.004367,0.008396,0.074625,0.026567,...,0.036715,0.0,0.0,-0.018064,-0.018059,0.075472,0.0,0.0,0.0,-0.051887
2017-12-10,-0.019071,0.041012,-0.005359,-0.017882,0.010375,-0.009822,-0.028014,-0.010386,0.0206,-0.054271,...,-0.00241,0.0,0.0,0.016973,-0.015556,-0.055679,0.0,0.0,0.0,0.062657
2017-12-03,-0.00966,0.009267,0.105501,0.013947,0.11263,-0.022404,0.073838,-0.028456,0.045796,0.024717,...,0.065742,0.0,0.0,0.080475,0.014656,-0.006637,0.0,0.0,0.0,0.047244
2008-02-03,0.038265,0.252238,0.002941,0.095182,0.097833,0.028767,0.006245,-0.078058,0.036913,0.083217,...,0.137066,0.127561,0.28655,0.167722,-0.087879,0.069364,0.171949,0.193189,0.127811,0.149083
2008-01-27,-0.013963,-0.048762,0.19131,0.071788,0.043997,-0.194286,-0.008984,-0.090807,-0.034771,0.054572,...,0.018349,-0.026292,-0.046975,0.136418,-0.003021,0.145695,0.042164,-0.014553,0.141892,0.118666
2008-01-20,-0.065,0.086627,-0.080541,-0.054762,-0.007176,-0.065609,0.015818,-0.019721,-0.015219,-0.044397,...,0.040573,0.010999,-0.167109,-0.051614,-0.054286,-0.124638,0.037172,-0.037312,-0.030144,-0.076969
2008-01-13,0.035375,-0.041902,-0.037818,-0.046538,-0.101486,-0.040878,-0.052095,0.097385,0.080137,-0.017313,...,-0.054176,-0.047993,-0.102381,0.037264,-0.022346,-0.172662,0.011799,0.05188,0.018692,-0.094249
2008-01-06,-0.072553,-0.156356,-0.068707,-0.133301,-0.065496,-0.098984,-0.029478,-0.098374,-0.037363,-0.132733,...,-0.02729,-0.075806,-0.004739,-0.081058,0.101538,-0.143737,-0.1341,0.000752,-0.133102,-0.269012


In [14]:
# Create & stack 52-week sequences
n = len(returns)
T = 52 # weeks
tcols = list(range(T))
tickers = returns.columns
data = pd.DataFrame()
for i in range(n-T-1):
    df = returns.iloc[i:i+T+1]
    date = df.index.max()
    data = pd.concat([data, (df.reset_index(drop=True).T.assign(date=date, ticker=tickers)
                             .set_index(['ticker', 'date']))])
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1167341 entries, ('A', Timestamp('2017-12-31 00:00:00')) to ('ZUMZ', Timestamp('2009-01-11 00:00:00'))
Data columns (total 53 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   0       1167341 non-null  float64
 1   1       1167341 non-null  float64
 2   2       1167341 non-null  float64
 3   3       1167341 non-null  float64
 4   4       1167341 non-null  float64
 5   5       1167341 non-null  float64
 6   6       1167341 non-null  float64
 7   7       1167341 non-null  float64
 8   8       1167341 non-null  float64
 9   9       1167341 non-null  float64
 10  10      1167341 non-null  float64
 11  11      1167341 non-null  float64
 12  12      1167341 non-null  float64
 13  13      1167341 non-null  float64
 14  14      1167341 non-null  float64
 15  15      1167341 non-null  float64
 16  16      1167341 non-null  float64
 17  17      1167341 non-null  float64
 18  18      1167341 non-null  flo

In [15]:
pd.concat([data.head(), data.tail()])

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,2017-12-31,-0.005642,-0.003846,0.003413,-0.019071,-0.00966,0.008286,0.029636,-0.022817,0.005885,0.010706,...,0.004639,0.002131,0.025218,0.019846,0.015005,0.02163,-0.021976,0.014586,0.053336,-0.011001
AAL,2017-12-31,-0.010648,0.029965,0.000784,0.041012,0.009267,0.024689,0.034265,-0.034352,-0.002313,-0.084328,...,0.011668,-0.01343,0.009903,0.053175,-0.058573,-0.021875,0.007345,0.031162,-0.010281,-0.036922
AAN,2017-12-31,-0.010184,0.090171,-0.052591,-0.005359,0.105501,-0.027702,-0.003553,0.004392,-0.069239,-0.08423,...,0.028698,0.009283,-0.103827,0.004009,-0.019974,0.007588,-0.05429,0.038561,-0.035324,-0.007138
AAON,2017-12-31,-0.001361,0.044034,-0.014006,-0.017882,0.013947,0.040639,0.019231,-0.018868,-0.010057,0.00578,...,0.023188,0.004367,0.0,0.010294,-0.028571,0.070336,-0.006079,0.006116,-0.01059,-0.006015
AAP,2017-12-31,-0.008553,-0.00149,0.003888,0.010375,0.11263,-0.000448,0.099114,-0.00709,-0.001587,-0.055888,...,-0.012585,-0.026303,-0.015056,0.007988,-0.009856,-0.041574,-0.018823,0.030419,0.003016,-0.010358
ZIXI,2009-01-11,0.207599,0.215047,-0.036036,-0.075,-0.04,-0.155405,0.165354,-0.196203,-0.141304,0.027933,...,0.022727,0.010204,-0.107062,0.08933,0.057743,0.02973,0.069364,0.145695,-0.124638,-0.172662
ZLC,2009-01-11,-0.144638,0.204204,0.003012,0.040752,-0.220049,-0.310287,-0.244586,-0.345288,-0.135544,-0.186987,...,-0.003158,-0.016563,0.143195,0.045792,-0.094678,0.027043,0.171949,0.042164,0.037172,0.011799
ZMH,2009-01-11,0.003113,0.050566,-0.000754,0.022359,0.036494,0.005895,0.000805,-0.112143,-0.052988,-0.044799,...,0.019026,-0.001727,-0.008951,0.001714,-0.025944,-0.016671,0.193189,-0.014553,-0.037312,0.05188
ZQK,2009-01-11,-0.038647,0.239521,-0.125654,0.248366,0.275,-0.142857,0.573034,-0.450617,-0.256881,-0.158301,...,0.053592,-0.025556,-0.039488,0.003212,0.047085,-0.064008,0.127811,0.141892,-0.030144,0.018692
ZUMZ,2009-01-11,0.168831,-0.002591,-0.06988,-0.009547,-0.076075,0.049769,0.196676,0.018336,-0.177494,-0.116803,...,0.080085,-0.197383,-0.059893,0.013001,-0.077461,-0.001497,0.149083,0.118666,-0.076969,-0.094249


In [16]:
data[tcols] = (data[tcols].apply(lambda x: x.clip(lower=x.quantile(.01), upper=x.quantile(.99))))
data = data.rename(columns={0: 'fwd_returns'})
data['label'] = (data['fwd_returns'] > 0).astype(int)
data.shape

(1167341, 54)

In [17]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fwd_returns,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,label
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,2017-12-31,-0.005642,-0.003846,0.003413,-0.019071,-0.00966,0.008286,0.029636,-0.022817,0.005885,0.010706,...,0.002131,0.025218,0.019846,0.015005,0.02163,-0.021976,0.014586,0.053336,-0.011001,0
AAL,2017-12-31,-0.010648,0.029965,0.000784,0.041012,0.009267,0.024689,0.034265,-0.034352,-0.002313,-0.084328,...,-0.01343,0.009903,0.053175,-0.058573,-0.021875,0.007345,0.031162,-0.010281,-0.036922,0
AAN,2017-12-31,-0.010184,0.090171,-0.052591,-0.005359,0.105501,-0.027702,-0.003553,0.004392,-0.069239,-0.08423,...,0.009283,-0.103827,0.004009,-0.019974,0.007588,-0.05429,0.038561,-0.035324,-0.007138,0
AAON,2017-12-31,-0.001361,0.044034,-0.014006,-0.017882,0.013947,0.040639,0.019231,-0.018868,-0.010057,0.00578,...,0.004367,0.0,0.010294,-0.028571,0.070336,-0.006079,0.006116,-0.01059,-0.006015,0
AAP,2017-12-31,-0.008553,-0.00149,0.003888,0.010375,0.11263,-0.000448,0.099114,-0.00709,-0.001587,-0.055888,...,-0.026303,-0.015056,0.007988,-0.009856,-0.041574,-0.018823,0.030419,0.003016,-0.010358,0


In [12]:
data.sort_index().to_hdf('rnn_data.h5', 'returns_weekly')