In [13]:
import random
random.seed(1747)

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
import warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings
import sys
import numpy as np
np.random.seed(1747)
import pandas as pd
import time 
import copy
from itertools import product

import tensorflow as tf
tf.set_random_seed(1747)

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, explained_variance_score, r2_score
from sklearn.linear_model import LinearRegression, Lasso, lasso_path, lars_path, LassoLarsIC

import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import Input
from keras.models import Model

from keras.layers.recurrent import LSTM, GRU
from keras.regularizers import l1
from keras.models import Sequential
from keras.models import load_model

import ffn
%matplotlib inline

import plotly as py
# print (py.__version__) # requires version >= 1.9.0
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
import plotly.figure_factory as ff

init_notebook_mode(connected=True)



In [2]:
print("Loading industry data...")
rawdata = pd.read_csv("30_Industry_Portfolios_Daily.CSV")
rawdata["date"] = pd.to_datetime(rawdata['yyyymmdd'], format='%Y%m%d')
rawdata = rawdata.drop(columns=['yyyymmdd'])    
rawdata = rawdata.set_index('date')

industries = list(rawdata.columns)
# map industry names to col nums
ind_reverse_dict = dict([(industries[i], i) for i in range(len(industries))])

# convert to levels
rawdata = rawdata/100 + 1.0
rawdata = rawdata.cumprod()

# convert to weekly(Friday)
def take_last(array_like):
    return array_like[-1]

wdata = rawdata.resample('W-FRI', how=take_last)
wdata = wdata[industries]

week_change = wdata.pct_change(periods=1)
fourweek_change = wdata.pct_change(periods=4)

for ind in industries:
    wdata[ind + ".pct"] = week_change[ind]

wdata = wdata.drop(columns=industries)    

print("Loading risk-free rate data...")
rfdata = pd.read_csv("F-F_Research_Data_Factors_weekly.CSV")
rfdata["date"] = pd.to_datetime(rfdata['yyyymmdd'], format='%Y%m%d')
rfdata = rfdata.drop(columns=['yyyymmdd'])    
rfdata = rfdata.set_index('date')
wdata['mkt-rf'] = rfdata['Mkt-RF']/100

print("Loading claims data...")
claims = pd.read_csv("IC4WSA.csv")
claims["date"] = pd.to_datetime(claims['DATE'], format='%Y-%m-%d')
claims = claims.drop(columns=['DATE'])    
claims = claims.set_index('date')
claims.index = claims.index + pd.DateOffset(6) # move forward because this week data is released for last week
claims['claims_pct'] = claims.pct_change(periods=4) # 4-week pct change in 4-week moving average

print("Loading 10-year rate data...")
tenyear = pd.read_csv("WGS10YR.csv")
tenyear["date"] = pd.to_datetime(tenyear['DATE'], format='%Y-%m-%d')
tenyear = tenyear.drop(columns=['DATE'])    
tenyear = tenyear.set_index('date')

print("Loading 3-month rate data...")
threemonth = pd.read_csv("WTB3MS.csv")
threemonth["date"] = pd.to_datetime(threemonth['DATE'], format='%Y-%m-%d')
threemonth = threemonth.drop(columns=['DATE'])    
threemonth = threemonth.set_index('date')

wdata['claims'] = claims['claims_pct']
wdata['tenyear'] = tenyear['WGS10YR']/100
wdata['threemonth'] = threemonth['WTB3MS']/100
wdata['curve'] = wdata['tenyear'] - wdata['threemonth']
wdata['tenyear'] = wdata['tenyear'].diff() # first difference 10-year yield
wdata['threemonth'] = wdata['threemonth'].diff() # first difference 3-month
wdata['dayofyear'] = wdata.index.dayofyear / 365 # for possible seasonality

# response variables at the end
for ind in industries:
    wdata[ind + ".pct4"] = fourweek_change[ind]
    
wdata = wdata.dropna(axis=0, how='any') 
wdata

Loading industry data...
Loading risk-free rate data...
Loading claims data...
Loading 10-year rate data...
Loading 3-month rate data...


Unnamed: 0_level_0,Food.pct,Beer.pct,Smoke.pct,Games.pct,Books.pct,Hshld.pct,Clths.pct,Hlth.pct,Chems.pct,Txtls.pct,...,Telcm.pct4,Servs.pct4,BusEq.pct4,Paper.pct4,Trans.pct4,Whlsl.pct4,Rtail.pct4,Meals.pct4,Fin.pct4,Other.pct4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1967-03-03,0.002101,0.035852,0.004228,0.012772,0.015615,0.024415,-0.005060,0.022643,-0.002182,-0.032488,...,0.084925,-0.010007,0.086659,0.023132,0.020831,0.064768,-0.013532,0.067936,-0.005166,0.043061
1967-03-10,0.001094,0.024506,0.039674,-0.007063,0.030935,0.019204,0.006009,0.011095,0.007190,-0.003956,...,0.088574,0.034771,0.073570,0.035405,0.065833,0.083634,-0.030792,0.057092,-0.002137,0.018451
1967-03-17,0.016759,0.031390,0.031683,-0.002504,0.009321,0.006715,0.008835,0.002835,0.028879,0.042863,...,0.056010,0.041497,0.063894,0.052470,0.083686,0.056166,-0.012612,0.119532,-0.007355,0.036718
1967-03-31,0.004573,-0.024424,-0.025362,-0.004750,-0.000130,-0.012690,0.005300,0.008178,-0.014771,0.006206,...,-0.018970,0.042075,0.027385,0.048315,0.006766,0.030709,0.026021,0.135993,0.011499,0.024485
1967-04-07,0.008760,0.019188,-0.003340,-0.023719,-0.002248,-0.019309,-0.008903,-0.010805,-0.017224,0.002846,...,-0.018940,-0.009912,-0.005756,-0.000996,-0.040430,0.011845,0.004025,0.085482,0.011118,-0.010792
1967-04-14,0.004244,-0.003171,-0.005318,0.015349,0.034845,0.036147,0.006463,0.010747,0.003402,0.003282,...,-0.019125,-0.015022,0.009041,-0.022642,-0.032368,-0.011070,0.004708,0.034950,0.012838,-0.030469
1967-04-21,0.007804,0.016138,0.022990,0.080731,0.009641,0.027908,0.014566,0.023512,0.033415,0.026091,...,-0.020690,0.049828,0.033591,0.003859,-0.012814,0.011383,0.055404,0.085144,0.003784,-0.001570
1967-04-28,0.020146,0.028082,0.020118,0.031001,0.019497,0.001773,0.016577,0.020756,0.037491,0.037460,...,0.002441,0.072553,0.080182,0.016933,0.007564,0.024363,0.097227,0.089485,0.004510,0.021756
1967-05-05,0.011409,0.014678,-0.017793,-0.002141,0.004879,0.008101,0.036309,0.011590,0.032330,0.009107,...,-0.018578,0.102903,0.096849,0.075256,0.028442,0.060199,0.115996,0.124440,0.011935,0.052342
1967-05-12,-0.000940,-0.021385,-0.022804,0.010102,-0.012730,-0.012912,-0.001049,-0.025359,-0.031184,0.018571,...,-0.027034,0.086401,0.053584,0.072543,0.033290,0.099255,0.087425,0.132270,0.018062,0.058918


In [3]:
allcols = list(wdata.columns)
ncols = len(allcols)

responses = allcols[-30:]
nresponses = len(responses)

predictors = allcols[:ncols-nresponses]
npredictors = len(predictors)
# create inputs for NN
# 52 x 30 weekly changes
# 4-week change 4 weeks hence
# portfolio return will be mean(top6, -bot6)/4 (each week hold put 1/4 into portfolio and hold for 4 weeks)
# remove NAs
# predict 4-week change based on preceding 52 weeks


In [4]:
wdata[responses]

Unnamed: 0_level_0,Food.pct4,Beer.pct4,Smoke.pct4,Games.pct4,Books.pct4,Hshld.pct4,Clths.pct4,Hlth.pct4,Chems.pct4,Txtls.pct4,...,Telcm.pct4,Servs.pct4,BusEq.pct4,Paper.pct4,Trans.pct4,Whlsl.pct4,Rtail.pct4,Meals.pct4,Fin.pct4,Other.pct4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1967-03-03,0.016857,0.053990,-0.033836,0.018749,0.061618,0.036749,0.021588,0.038544,0.001688,0.004670,...,0.084925,-0.010007,0.086659,0.023132,0.020831,0.064768,-0.013532,0.067936,-0.005166,0.043061
1967-03-10,0.005365,0.069764,0.009846,-0.031634,0.068904,0.050362,0.007388,0.052341,-0.004957,-0.001989,...,0.088574,0.034771,0.073570,0.035405,0.065833,0.083634,-0.030792,0.057092,-0.002137,0.018451
1967-03-17,0.013484,0.107564,0.046344,-0.010037,0.080310,0.061035,0.007703,0.041734,0.031526,-0.023881,...,0.056010,0.041497,0.063894,0.052470,0.083686,0.056166,-0.012612,0.119532,-0.007355,0.036718
1967-03-31,0.028246,0.052321,0.053038,-0.015128,0.051689,0.022878,0.030307,0.044713,0.029658,0.048208,...,-0.018970,0.042075,0.027385,0.048315,0.006766,0.030709,0.026021,0.135993,0.011499,0.024485
1967-04-07,0.036119,0.046859,0.009470,-0.031649,0.017837,-0.015774,0.015035,0.022085,0.004700,0.055366,...,-0.018940,-0.009912,-0.005756,-0.000996,-0.040430,0.011845,0.004025,0.085482,0.011118,-0.010792
1967-04-14,0.023366,0.011780,-0.026734,-0.014318,0.043577,0.013000,0.012648,0.030149,-0.020179,0.015310,...,-0.019125,-0.015022,0.009041,-0.022642,-0.032368,-0.011070,0.004708,0.034950,0.012838,-0.030469
1967-04-21,0.025615,0.007137,-0.011570,0.066203,0.042339,0.031243,0.017395,0.031704,0.004017,0.038795,...,-0.020690,0.049828,0.033591,0.003859,-0.012814,0.011383,0.055404,0.085144,0.003784,-0.001570
1967-04-28,0.041514,0.061343,0.034553,0.104503,0.062799,0.046351,0.028809,0.044575,0.057275,0.071062,...,0.002441,0.072553,0.080182,0.016933,0.007564,0.024363,0.097227,0.089485,0.004510,0.021756
1967-05-05,0.044250,0.056646,0.019551,0.128915,0.070391,0.075596,0.075741,0.068222,0.110586,0.077748,...,-0.018578,0.102903,0.096849,0.075256,0.028442,0.060199,0.115996,0.124440,0.011935,0.052342
1967-05-12,0.038859,0.037339,0.001628,0.123081,0.021181,0.024670,0.067712,0.030063,0.072306,0.094172,...,-0.027034,0.086401,0.053584,0.072543,0.033290,0.099255,0.087425,0.132270,0.018062,0.058918


In [5]:
wdata[predictors]

Unnamed: 0_level_0,Food.pct,Beer.pct,Smoke.pct,Games.pct,Books.pct,Hshld.pct,Clths.pct,Hlth.pct,Chems.pct,Txtls.pct,...,Whlsl.pct,Rtail.pct,Meals.pct,Fin.pct,Other.pct,mkt-rf,claims,tenyear,threemonth,curve
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1967-03-03,0.002101,0.035852,0.004228,0.012772,0.015615,0.024415,-0.005060,0.022643,-0.002182,-0.032488,...,0.001153,-0.027581,0.022499,-0.003386,0.006235,0.0092,0.095694,-0.0005,-0.0015,0.0021
1967-03-10,0.001094,0.024506,0.039674,-0.007063,0.030935,0.019204,0.006009,0.011095,0.007190,-0.003956,...,0.017914,0.003678,0.037135,-0.003198,0.022517,0.0084,0.196682,-0.0008,-0.0009,0.0022
1967-03-17,0.016759,0.031390,0.031683,-0.002504,0.009321,0.006715,0.008835,0.002835,0.028879,0.042863,...,0.010179,0.015839,0.053328,0.003128,0.027641,0.0132,0.180139,-0.0009,-0.0009,0.0022
1967-03-31,0.004573,-0.024424,-0.025362,-0.004750,-0.000130,-0.012690,0.005300,0.008178,-0.014771,0.006206,...,-0.002169,0.002851,0.045797,0.002558,-0.027364,-0.0058,0.138646,0.0001,-0.0002,0.0038
1967-04-07,0.008760,0.019188,-0.003340,-0.023719,-0.002248,-0.019309,-0.008903,-0.010805,-0.017224,0.002846,...,-0.000716,-0.017840,-0.008980,-0.003574,-0.012693,-0.0091,-0.017822,-0.0002,-0.0018,0.0054
1967-04-14,0.004244,-0.003171,-0.005318,0.015349,0.034845,0.036147,0.006463,0.010747,0.003402,0.003282,...,-0.012699,0.016530,0.004293,0.004834,0.007200,0.0101,-0.013699,0.0003,-0.0010,0.0067
1967-04-21,0.007804,0.016138,0.022990,0.080731,0.009641,0.027908,0.014566,0.023512,0.033415,0.026091,...,0.027354,0.054095,0.042550,-0.000020,0.032284,0.0207,0.023121,0.0011,-0.0002,0.0080
1967-04-28,0.020146,0.028082,0.020118,0.031001,0.019497,0.001773,0.016577,0.020756,0.037491,0.037460,...,0.010637,0.042591,0.049981,0.003283,-0.004641,0.0167,0.016299,0.0010,-0.0011,0.0101
1967-05-05,0.011409,0.014678,-0.017793,-0.002141,0.004879,0.008101,0.036309,0.011590,0.032330,0.009107,...,0.034243,-0.001039,0.022815,0.003791,0.016862,0.0064,0.064516,0.0003,0.0000,0.0104
1967-05-12,-0.000940,-0.021385,-0.022804,0.010102,-0.012730,-0.012912,-0.001049,-0.025359,-0.031184,0.018571,...,0.023672,-0.009495,0.011287,0.010918,0.013494,-0.0069,0.044643,0.0009,-0.0006,0.0119


In [6]:
wdata[responses].describe()


Unnamed: 0,Food.pct4,Beer.pct4,Smoke.pct4,Games.pct4,Books.pct4,Hshld.pct4,Clths.pct4,Hlth.pct4,Chems.pct4,Txtls.pct4,...,Telcm.pct4,Servs.pct4,BusEq.pct4,Paper.pct4,Trans.pct4,Whlsl.pct4,Rtail.pct4,Meals.pct4,Fin.pct4,Other.pct4
count,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,...,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0
mean,0.009873,0.010292,0.013569,0.010552,0.007423,0.007724,0.009669,0.009717,0.009208,0.009223,...,0.008091,0.01015,0.008944,0.008871,0.008601,0.009124,0.010088,0.011024,0.009592,0.006428
std,0.042092,0.050558,0.060109,0.070889,0.058517,0.047064,0.064761,0.048806,0.056145,0.071426,...,0.046037,0.064003,0.067026,0.050561,0.058399,0.055936,0.054305,0.061893,0.05601,0.055216
min,-0.218967,-0.240731,-0.265369,-0.494009,-0.396657,-0.273293,-0.365254,-0.240028,-0.369818,-0.456555,...,-0.322623,-0.304315,-0.319093,-0.295365,-0.296895,-0.307358,-0.304006,-0.346836,-0.334256,-0.293171
25%,-0.011788,-0.016683,-0.021374,-0.027623,-0.024131,-0.016443,-0.026098,-0.018981,-0.023425,-0.02642,...,-0.0172,-0.025732,-0.027775,-0.020102,-0.02578,-0.020741,-0.02001,-0.021982,-0.018781,-0.023841
50%,0.010565,0.010806,0.014151,0.013603,0.008355,0.009287,0.01112,0.011661,0.011145,0.01066,...,0.009986,0.014086,0.009948,0.011533,0.010853,0.012041,0.011137,0.013083,0.012459,0.008506
75%,0.034568,0.038977,0.048042,0.052699,0.040107,0.035173,0.046891,0.040889,0.04155,0.046725,...,0.035883,0.047378,0.046666,0.038587,0.044652,0.040683,0.043402,0.044505,0.041203,0.039116
max,0.230836,0.275,0.356415,0.399072,0.432414,0.252282,0.408192,0.297003,0.359382,0.843838,...,0.245274,0.250527,0.34824,0.291572,0.33058,0.251681,0.2389,0.394898,0.47462,0.31921


In [7]:
wdata[predictors].describe()

Unnamed: 0,Food.pct,Beer.pct,Smoke.pct,Games.pct,Books.pct,Hshld.pct,Clths.pct,Hlth.pct,Chems.pct,Txtls.pct,...,Whlsl.pct,Rtail.pct,Meals.pct,Fin.pct,Other.pct,mkt-rf,claims,tenyear,threemonth,curve
count,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,...,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0,2568.0
mean,0.002414,0.002616,0.003365,0.002566,0.001738,0.001873,0.002299,0.00237,0.002267,0.002093,...,0.002105,0.002506,0.00266,0.002234,0.001435,0.001035,0.0013,-1.7e-05,-8e-06,0.016428
std,0.020529,0.025763,0.030726,0.03413,0.02773,0.023974,0.029607,0.024913,0.028011,0.032678,...,0.025468,0.026496,0.029115,0.027782,0.026167,0.022289,0.048051,0.00134,0.002033,0.012507
min,-0.147642,-0.173538,-0.179885,-0.240424,-0.194454,-0.250741,-0.182363,-0.17511,-0.18155,-0.238618,...,-0.177007,-0.168158,-0.158885,-0.214332,-0.188001,-0.18,-0.223296,-0.0103,-0.0182,-0.0357
25%,-0.008947,-0.011602,-0.013517,-0.014984,-0.012142,-0.011222,-0.013562,-0.01159,-0.012891,-0.013517,...,-0.011759,-0.012085,-0.013028,-0.01206,-0.01247,-0.011225,-0.027842,-0.0007,-0.0004,0.0073
50%,0.002506,0.002834,0.0037,0.003,0.002204,0.002574,0.00307,0.002802,0.003467,0.00268,...,0.003285,0.003012,0.0032,0.003065,0.002556,0.0026,-0.003019,0.0,0.0,0.0175
75%,0.014221,0.017537,0.019813,0.021421,0.016539,0.015663,0.01885,0.016821,0.017571,0.018028,...,0.016803,0.017811,0.019364,0.016762,0.016193,0.013925,0.025238,0.0007,0.0005,0.0263
max,0.157204,0.142199,0.251794,0.317508,0.236833,0.182803,0.190699,0.191077,0.155439,0.277444,...,0.116496,0.139558,0.188687,0.263736,0.181089,0.1346,0.260521,0.0098,0.0192,0.0501


In [None]:
# plot correlation matrix
target = responses[0]
for lag in range(12):
    m.append([wdata[target].shift(lag).corr(wdata[pred]) for pred in predictors])
    
def plot_matrix(m, x_labels, y_labels, x_suffix="", y_suffix=""):

    """plot a heat map of a matrix"""
    chart_width=640
    chart_height=480
    
    layout = Layout(
        height=chart_height,
        width=chart_width,     
        margin=dict(
            l=150,
            r=30,
            b=120,
            t=100,
        ),
        xaxis=dict(
            title=y_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
        yaxis=dict(
            title=x_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
    )
    
    data = [Heatmap(z=m,
                    colorscale=[[0, 'rgb(0,0,255)', [1, 'rgb(255,0,0)']]],
                   )
           ]

    fig = Figure(data=data, layout=layout)
    return iplot(fig, link_text="")


laglabels = ["%d" % i for i in range(12)]

plot_matrix(np.matrix(m).T, predictors, laglabels, x_suffix="", y_suffix="")

In [None]:
# do subset selection on X vs Y[t+1] to see which variables show up a lot

def subset_selection(X, Y, model_aic, verbose=False):
    
    global responses
    global response_reverse_dict
    global predictors
    global predictor_reverse_dict
    
    coef_dict = {}
    for response_index, response in enumerate(responses):
        y = Y[:,response_reverse_dict[response]]
        
        model_aic.fit(X, y)

        coef_dict[response] = [predstr for i, predstr in enumerate(predictors) if model_aic.coef_[i] !=0]
        #y_response = model_aic.responseict(X)
        # print ("In-sample LASSO R-squared: %.6f" % r2_score(y, y_response))
        if verbose:
            print("LASSO variables selected for %s: " % response)
            print(coef_dict[response])
        
        if not coef_dict[response]:
            if verbose:
                print("No coefs selected for " + response + ", using all")
                print("---")
            coef_dict[response] = predictors            
        # fit OLS vs. selected vars, better fit w/o LASSO penalties
        # in-sample R-squared using LASSO coeffs
        if verbose:
            print("Running OLS for " + response + " against " + str(coef_dict[response]))
            # col nums of selected responses
            predcols = [predictor_reverse_dict[predstr] for predstr in coef_dict[response]]
            model_ols = LinearRegression()
            model_ols.fit(X[:, predcols], y)
            y_pred = model_ols.predict(X[:, predcols])
            print ("In-sample OLS R-squared: %.2f" % (100 * r2_score(y, y_pred)))
            print("---")
            
    return coef_dict

coef_dict = subset_selection(X, Y, LassoLarsIC(criterion='aic'), verbose=True)


In [9]:
lookback = 12 # use 12 weeks of data to forecast
lead = 4 # forecast 4 weeks ahead
# first Y to predict is row 15 using 0:11
Y = wdata.values[lookback+lead-1:, -nresponses:]
numrows = Y.shape[0]

X_raw = wdata.values[:,:npredictors]

# each input will have cols 12 * npredictors
X = np.zeros([numrows, lookback * npredictors])
for row in range(numrows):
    prev12 = []
    for i in range(lookback):
        prev12.append(X_raw[row + i])
    X[row] = np.hstack(prev12)

print(X.shape)
print(Y.shape)

(2553, 420)
(2553, 30)


In [10]:
# double check this vs. above, predicting last Y using 12 Xs ending 4 prior
print (Y[-1])
print("---")
print (X[-1])

[-0.06263117 -0.07485018 -0.06087798  0.00381966 -0.00494537 -0.05620866
 -0.00846332 -0.05959691 -0.03720178 -0.09404439 -0.09437882 -0.0461106
 -0.04766759 -0.05547696 -0.05373372 -0.00132279 -0.04406671  0.01860665
 -0.11279651 -0.0233188  -0.05511614 -0.01323344 -0.00990411 -0.06720896
 -0.05977789 -0.07305412 -0.04290194 -0.05308474 -0.03385774 -0.05744628]
---
[ 1.74088971e-02  2.13535743e-02  9.43764789e-03  9.08980595e-04
  1.54017486e-02  1.90905347e-02  9.09333641e-03 -3.42733195e-03
 -5.95181849e-03  1.37521350e-02  6.55487688e-03 -3.80168521e-02
 -1.22924294e-02 -1.44773285e-02 -1.46520244e-02 -1.30234646e-02
  9.46978629e-03 -1.88473140e-02  1.54458040e-02  4.45538194e-03
  1.10967049e-02 -9.01786328e-03  5.03041630e-03 -1.69213122e-02
 -1.62334599e-02 -2.38256714e-02  1.39423923e-02 -5.74507008e-03
 -2.37214876e-02 -6.70439603e-03 -4.10000000e-03 -7.24206349e-02
 -2.00000000e-04  6.00000000e-04  1.14000000e-02  7.84742538e-03
  1.02092660e-02  7.42336784e-03  9.55911501e-

In [11]:
OUTPUT_DIM = len(responses) # 30
OUTPUT_DIM

30

In [12]:

INPUT_DIM = X.shape[1] # 420
OUTPUT_DIM = len(responses) # 30

def build_model(n_hidden_layers = 2,
                hidden_layer_size = 32,
                reg_penalty = 0.0001,
                dropout = 0.333,
                verbose=True):

    main_input = Input(shape=(INPUT_DIM,), 
                       dtype='float32', 
                       name='main_input')
    lastlayer=main_input

    for i in range(n_hidden_layers):
        if verbose:
            print("layer %d size %d, reg_penalty %.8f, dropout %.3f" % (i, hidden_layer_size, reg_penalty, dropout))
        lastlayer = Dense(units = hidden_layer_size, 
                          activation = 'relu',
                          kernel_initializer = keras.initializers.glorot_uniform(),
                          kernel_regularizer=keras.regularizers.l1(reg_penalty),
                          name = "Dense%02d" % i)(lastlayer)

        if dropout:
            lastlayer = Dropout(dropout, name = "Dropout%02d" % i)(lastlayer)
    
    outputs = []
    for i in range(OUTPUT_DIM):
        # OUTPUT_DIM outputs
        output01 = Dense(1,
                         activation='linear', 
                         name='output%02d' % i)(lastlayer)
        outputs.append(output01)
    
    model = Model(inputs=[main_input], outputs=outputs)
    if verbose:
        print(model.summary())
    model.compile(loss="mse", optimizer="rmsprop", loss_weights=[1.]*OUTPUT_DIM)
    return model


In [16]:
# run an experiment with walk-forward cross-validation

EPOCHS = 500
#VAL_SPLIT = 0.2
BATCH_SIZE = 128

def run_experiment (n_hidden_layers = 2,
                    hidden_layer_size = 8,
                    reg_penalty = 0.0,
                    dropout = 0.5,
                    epochs = EPOCHS
                   ):

    start = time.time()

    # generate k-folds
    n_splits = 5
    kf = KFold(n_splits=n_splits)
    kf.get_n_splits(X)
    last_indexes = []
    for train_index, test_index in kf.split(X):
        # use test_index as last index to train
        last_index = test_index[-1] + 1
        last_indexes.append(last_index)

    print("%s Generate splits %s" % (time.strftime("%H:%M:%S"), str([i for i in last_indexes])))
    
    avg_bests = []

    print("%s Build model" % (time.strftime("%H:%M:%S")))
    model = build_model(n_hidden_layers = n_hidden_layers,
                        hidden_layer_size = hidden_layer_size,
                        reg_penalty = reg_penalty,
                        dropout = dropout)
    print("Compile time : %s" % str(time.time() - start))
    print("Starting to train : %s" % (time.strftime("%H:%M:%S")))
    for i in range(1, n_splits-1):

        models = []
        losses = []
        scores = []
        count = 0        
        # skip kfold 0 so you start with train 2x size of eval set
        last_train_index = last_indexes[i]
        last_xval_index = last_indexes[i+1]

        # set up train, xval
        # train from beginning to last_train_index
        print("Training indexes 0 to %d" % (last_train_index-1))
        X_fit = X[:last_train_index]
        Y_fit = Y[:last_train_index]
        # xval from last_train_index to last_xval_index
        print("Cross-validating indexes %d to %d" % (last_train_index, last_xval_index -1 ))
        X_xval = X[last_train_index:last_xval_index]
        Y_xval = Y[last_train_index:last_xval_index]

        responses = []
        for i in range(OUTPUT_DIM):
            responses.append(Y_fit[:,i])
        # train for epochs
        for epoch in range(epochs):
            fit = model.fit(
                X_fit,
                responses,
                batch_size=BATCH_SIZE,
                #validation_split=VAL_SPLIT,
                epochs=1,
                verbose=0)
            
            train_loss = fit.history['loss'][-1]
            # evaluate ... run prediction, calc MSE by industry, and average
            y_xval_pred = np.array(model.predict(X_xval))
            y_xval_pred = y_xval_pred.reshape(Y_xval.T.shape)
            y_xval_pred = y_xval_pred.T
            mse_list = []
            for i in range(len(industries)):
                mse_list.append(mean_squared_error(Y_xval[:,i], y_xval_pred[:,i]))
            xval_score = np.mean(np.array(mse_list))            
            
            losses.append(train_loss)
            scores.append(xval_score)
            models.append(copy.copy(model))

            bestloss_index = np.argmin(scores)
            bestloss_value = scores[bestloss_index]

            sys.stdout.write('.')
            count += 1
            if count % 80 == 0:
                print("")
                print("%s Still training" % (time.strftime("%H:%M:%S")))
            sys.stdout.flush()            
            
            # stop if loss rises by 20% from best
            if xval_score / bestloss_value > 1.2:
                print("Stopping early" )
                break

        # choose model with lowest xval loss
        print("")
        print ("%s Best Xval loss epoch %d, value %f" % (time.strftime("%H:%M:%S"), bestloss_index, bestloss_value))
        avg_bests.append(bestloss_value)
        model = models[bestloss_index]
    
    print ("Last Xval loss %f" % (bestloss_value))
    avg_loss = np.mean(np.array(avg_bests))
    print ("Avg Xval loss %f" % avg_loss)
    print("--------------------------------------------------------------------------------")
    return (avg_loss, model)


In [None]:
run_experiment()


In [18]:
# run a lot of experiments in big xval loop
# make predictions
# pick best hyperparameters

MODELPREFIX = "FFNN"

n_hiddens = [1, 2, 3]
layer_sizes = [2, 4, 8, 16]
reg_penalties = [0.0, 0.0001, 0.001, 0.01]
dropouts = [0.25]

hyperparameter_combos = list(product(n_hiddens, layer_sizes, reg_penalties, dropouts))

print("%s Running %d experiments" % (time.strftime("%H:%M:%S"), len(hyperparameter_combos)))

experiments = {}

for counter, param_list in enumerate(hyperparameter_combos):
    n_hidden_layers, layer_size, reg_penalty, dropout = param_list
    print("%s Running experiment %d of %d" % (time.strftime("%H:%M:%S"), counter+1, len(hyperparameter_combos)))
    key = (n_hidden_layers, layer_size, reg_penalty, dropout)
    score, model = run_experiment(n_hidden_layers = n_hidden_layers,
                                  hidden_layer_size = layer_size,
                                  reg_penalty = reg_penalty,
                                  dropout = dropout,
                                  epochs=EPOCHS)
    experiments[key] = score 
    modelname = "%s_%.6f_%d_%d_%.6f_%.3f" % (MODELPREFIX, score, n_hidden_layers, layer_size, reg_penalty, dropout)
    print("%s Saving %s.h5" % (time.strftime("%H:%M:%S"), modelname))
    model.save("%s.h5" % modelname)
    model.save_weights("%s_weights.h5" % modelname)
    

13:07:24 Running 48 experiments
13:07:24 Running experiment 1 of 48
13:07:24 Generate splits [511, 1022, 1533, 2043, 2553]
13:07:24 Build model
layer 0 size 2, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, 420)          0                                            
__________________________________________________________________________________________________
Dense00 (Dense)                 (None, 2)            842         main_input[0][0]                 
__________________________________________________________________________________________________
Dropout00 (Dropout)             (None, 2)            0           Dense00[0][0]                    
__________________________________________________________________________________________________
output00 (

................................................................................
13:11:37 Still training
...............................Stopping early

13:11:50 Best Xval loss epoch 7, value 0.005729
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
13:12:33 Still training
................................................................................
13:13:15 Still training
................................................................................
13:13:55 Still training
................................................................................
13:14:35 Still training
................................................................................
13:15:15 Still training
................................................................................
13:15:55 Still training
....................
13:16:05 Best Xval loss epoch 9, value 0.005560
Last Xval loss 0.005560
Avg Xval los

Compile time : 0.540935754776001
Starting to train : 13:16:10
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
13:16:47 Still training
................................................................................
13:17:17 Still training
................................................................................
13:17:47 Still training
................................................................................
13:18:16 Still training
................................................................................
13:18:46 Still training
................................................................................
13:19:16 Still training
....................
13:19:23 Best Xval loss epoch 137, value 0.002615
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
13:19:59 Still training
..

Compile time : 0.8658597469329834
Starting to train : 13:27:54
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
13:28:32 Still training
................................................................................
13:29:02 Still training
................................................................................
13:29:32 Still training
................................................................................
13:30:01 Still training
................................................................................
13:30:30 Still training
................................................................................
13:30:59 Still training
....................
13:31:06 Best Xval loss epoch 180, value 0.002626
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
13:31:43 Still training
.

Compile time : 0.5540421009063721
Starting to train : 13:39:27
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
13:40:05 Still training
................................................................................
13:40:36 Still training
................................................................................
13:41:06 Still training
................................................................................
13:41:36 Still training
................................................................................
13:42:05 Still training
................................................................................
13:42:36 Still training
....................
13:42:44 Best Xval loss epoch 263, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
13:43:22 Still training
.

Compile time : 0.5367112159729004
Starting to train : 13:51:28
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
13:52:08 Still training
................................................................................
13:52:36 Still training
................................................................................
13:53:06 Still training
................................................................................
13:53:34 Still training
........................................................................Stopping early

13:54:01 Best Xval loss epoch 15, value 0.002649
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
13:54:38 Still training
................................................................................
13:55:14 Still training
........................................

Compile time : 0.5371801853179932
Starting to train : 13:57:46
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
13:58:28 Still training
................................................................................
13:59:00 Still training
................................................................................
13:59:30 Still training
................................................................................
14:00:00 Still training
................................................................................
14:00:32 Still training
................................................................................
14:01:03 Still training
....................
14:01:11 Best Xval loss epoch 68, value 0.002627
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
14:01:52 Still training
..

Compile time : 0.5355112552642822
Starting to train : 14:09:12
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
14:09:54 Still training
................................................................................
14:10:25 Still training
................................................................................
14:10:57 Still training
................................................................................
14:11:29 Still training
................................................................................
14:12:01 Still training
................................................................................
14:12:33 Still training
....................
14:12:41 Best Xval loss epoch 186, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
14:13:20 Still training
.

Compile time : 0.5405559539794922
Starting to train : 14:21:51
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
14:22:35 Still training
................................................................................
14:23:08 Still training
................................................................................
14:23:40 Still training
................................................................................
14:24:13 Still training
................................................................................
14:24:45 Still training
................................................................................
14:25:16 Still training
....................
14:25:25 Best Xval loss epoch 189, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
14:26:05 Still training
.

Compile time : 0.5347311496734619
Starting to train : 14:34:41
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
14:35:23 Still training
................................................................................
14:35:53 Still training
................................................................................
14:36:24 Still training
................................................................................
14:36:55 Still training
.........................................................Stopping early

14:37:17 Best Xval loss epoch 8, value 0.002659
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
14:37:54 Still training
...................................................................Stopping early

14:38:26 Best Xval loss epoch 4, value 0.006274
Training indexes 0 to 2042
Cro

Compile time : 0.5445613861083984
Starting to train : 14:40:45
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
14:41:31 Still training
..............................................................Stopping early

14:41:57 Best Xval loss epoch 17, value 0.002651
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
14:42:39 Still training
...........................................................Stopping early

14:43:09 Best Xval loss epoch 8, value 0.004929
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
14:43:57 Still training
................................................................................
14:44:45 Still training
..............Stopping early

14:44:54 Best Xval loss epoch 27, value 0.

Compile time : 0.5418565273284912
Starting to train : 14:45:07
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
14:45:53 Still training
................................................................................
14:46:25 Still training
................................................................................
14:46:59 Still training
................................................................................
14:47:33 Still training
................................................................................
14:48:05 Still training
................................................................................
14:48:38 Still training
....................
14:48:46 Best Xval loss epoch 384, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
14:49:28 Still training
.

Compile time : 0.5364131927490234
Starting to train : 14:58:29
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
14:59:14 Still training
................................................................................
14:59:48 Still training
................................................................................
15:00:21 Still training
................................................................................
15:00:56 Still training
................................................................................
15:01:30 Still training
................................................................................
15:02:03 Still training
....................
15:02:12 Best Xval loss epoch 211, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
15:02:55 Still training
.

Compile time : 0.5391185283660889
Starting to train : 15:12:01
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
15:12:47 Still training
...................................Stopping early

15:13:01 Best Xval loss epoch 8, value 0.002683
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
15:13:40 Still training
.....Stopping early

15:13:43 Best Xval loss epoch 3, value 0.005885
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
15:14:29 Still training
................................................................................
15:15:16 Still training
.....................Stopping early

15:15:29 Best Xval loss epoch 12, value 0.006786
Last Xval loss 0.006786
Avg Xval loss 0.005118
---------------------

Compile time : 0.5392169952392578
Starting to train : 15:15:44
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
15:16:32 Still training
..............................................Stopping early

15:16:52 Best Xval loss epoch 24, value 0.002642
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
15:17:36 Still training
................................................................................
15:18:20 Still training
.......Stopping early

15:18:24 Best Xval loss epoch 9, value 0.005066
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
15:19:17 Still training
................................................................................
15:20:09 Still training
...................................

Compile time : 0.5372419357299805
Starting to train : 15:23:59
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
15:24:49 Still training
................................................................................
15:25:24 Still training
................................................................................
15:25:59 Still training
................................................................................
15:26:33 Still training
................................................................................
15:27:09 Still training
................................................................................
15:27:44 Still training
....................
15:27:53 Best Xval loss epoch 149, value 0.002621
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
15:28:36 Still training
.

Compile time : 0.5392825603485107
Starting to train : 15:38:14
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
15:39:03 Still training
................................................................................
15:39:37 Still training
................................................................................
15:40:13 Still training
................................................................................
15:40:48 Still training
................................................................................
15:41:23 Still training
................................................................................
15:41:59 Still training
....................
15:42:08 Best Xval loss epoch 145, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
15:42:52 Still training
.

Compile time : 0.5539760589599609
Starting to train : 15:52:29
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
15:53:22 Still training
................................................................................
15:54:00 Still training
................................................................................
15:54:36 Still training
................................................................................
15:55:14 Still training
................................................................................
15:55:50 Still training
................................................................................
15:56:27 Still training
....................
15:56:36 Best Xval loss epoch 104, value 0.002638
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
15:57:21 Still training
.

Compile time : 0.5610275268554688
Starting to train : 16:04:35
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
16:05:29 Still training
................................................................................
16:06:09 Still training
................................................................................
16:06:47 Still training
................................................................................
16:07:27 Still training
................................................................................
16:08:04 Still training
................................................................................
16:08:44 Still training
....................
16:08:53 Best Xval loss epoch 125, value 0.002633
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
16:09:39 Still training
.

Compile time : 0.5583171844482422
Starting to train : 16:20:33
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
16:21:26 Still training
................................................................................
16:22:06 Still training
................................................................................
16:22:44 Still training
................................................................................
16:23:25 Still training
................................................................................
16:24:04 Still training
................................................................................
16:24:43 Still training
....................
16:24:53 Best Xval loss epoch 150, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
16:25:43 Still training
.

Compile time : 0.5587158203125
Starting to train : 16:36:48
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
16:37:43 Still training
................................................................................
16:38:22 Still training
................................................................................
16:39:02 Still training
................................................................................
16:39:41 Still training
................................................................................
16:40:22 Still training
................................................................................
16:41:02 Still training
....................
16:41:12 Best Xval loss epoch 399, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
16:42:01 Still training
....

Compile time : 0.5611262321472168
Starting to train : 16:53:07
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
16:54:04 Still training
................................................................................
16:54:42 Still training
................................................................................
16:55:20 Still training
................................................................................
16:55:58 Still training
................................................................................
16:56:36 Still training
................................................................................
16:57:13 Still training
....................
16:57:23 Best Xval loss epoch 10, value 0.002646
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
16:58:13 Still training
..

Compile time : 0.5583155155181885
Starting to train : 17:02:41
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
17:03:35 Still training
................................................................................
17:04:16 Still training
................................................................................
17:04:55 Still training
................................................................................
17:05:36 Still training
................................................................................
17:06:15 Still training
................................................................................
17:06:55 Still training
....................
17:07:05 Best Xval loss epoch 101, value 0.002626
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
17:07:55 Still training
.

Compile time : 0.5596153736114502
Starting to train : 17:19:11
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
17:20:08 Still training
................................................................................
17:20:47 Still training
................................................................................
17:21:26 Still training
................................................................................
17:22:06 Still training
................................................................................
17:22:45 Still training
................................................................................
17:23:22 Still training
....................
17:23:31 Best Xval loss epoch 328, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
17:24:20 Still training
.

Compile time : 0.5591487884521484
Starting to train : 17:35:23
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
17:36:20 Still training
................................................................................
17:37:00 Still training
................................................................................
17:37:40 Still training
................................................................................
17:38:20 Still training
................................................................................
17:39:01 Still training
................................................................................
17:39:40 Still training
....................
17:39:50 Best Xval loss epoch 314, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
17:40:41 Still training
.

Compile time : 0.5570101737976074
Starting to train : 17:52:00
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
17:52:56 Still training
.....................................................Stopping early

17:53:20 Best Xval loss epoch 10, value 0.002659
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
17:54:06 Still training
..........................Stopping early

17:54:21 Best Xval loss epoch 5, value 0.005000
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
17:55:17 Still training
................................................................................
17:56:12 Still training
................................................................................
17:57:10 Still training
.........

Compile time : 0.5588545799255371
Starting to train : 17:58:13
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
17:59:13 Still training
................................................................................
17:59:53 Still training
...................................Stopping early

18:00:11 Best Xval loss epoch 73, value 0.002627
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
18:01:02 Still training
.........................Stopping early

18:01:18 Best Xval loss epoch 17, value 0.004730
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
18:02:19 Still training
.......Stopping early

18:02:24 Best Xval loss epoch 8, value 0.006076
Last Xval loss 0.006076
Avg Xval loss 0.004478
--------------

Compile time : 0.558321475982666
Starting to train : 18:02:51
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
18:03:52 Still training
................................................................................
18:04:33 Still training
................................................................................
18:05:14 Still training
................................................................................
18:05:58 Still training
................................................................................
18:06:42 Still training
................................................................................
18:07:23 Still training
....................
18:07:32 Best Xval loss epoch 423, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
18:08:27 Still training
..

Compile time : 0.5622925758361816
Starting to train : 18:20:09
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
18:21:10 Still training
................................................................................
18:21:53 Still training
................................................................................
18:22:36 Still training
................................................................................
18:23:19 Still training
................................................................................
18:24:01 Still training
................................................................................
18:24:42 Still training
....................
18:24:52 Best Xval loss epoch 72, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
18:25:45 Still training
..

Compile time : 0.5528240203857422
Starting to train : 18:37:17
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
18:38:17 Still training
.......Stopping early

18:38:20 Best Xval loss epoch 5, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
18:39:07 Still training
..........Stopping early

18:39:13 Best Xval loss epoch 7, value 0.005468
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
18:40:13 Still training
................................................................................
18:41:11 Still training
................................................................................
18:42:11 Still training
........................................................................

Compile time : 0.5610697269439697
Starting to train : 18:45:52
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
18:46:55 Still training
...............................Stopping early

18:47:11 Best Xval loss epoch 24, value 0.002632
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
18:48:05 Still training
....................Stopping early

18:48:19 Best Xval loss epoch 2, value 0.005134
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
18:49:25 Still training
................................................................................
18:50:30 Still training
................................................................................
18:51:34 Still training
.....................................

Compile time : 0.560065507888794
Starting to train : 18:55:37
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
18:56:40 Still training
................................................................................
18:57:22 Still training
................................................................................
18:58:03 Still training
................................................................................
18:58:45 Still training
................................................................................
18:59:26 Still training
................................................................................
19:00:09 Still training
....................
19:00:19 Best Xval loss epoch 8, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
19:01:14 Still training
....

Compile time : 0.5587010383605957
Starting to train : 19:13:07
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
19:14:13 Still training
................................................................................
19:14:58 Still training
................................................................................
19:15:41 Still training
................................................................................
19:16:27 Still training
................................................................................
19:17:11 Still training
................................................................................
19:17:55 Still training
....................
19:18:06 Best Xval loss epoch 431, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
19:19:04 Still training
.

Compile time : 0.5648822784423828
Starting to train : 19:31:03
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
19:32:08 Still training
................................................................................
19:32:48 Still training
................................................................................
19:33:28 Still training
................................................................................
19:34:07 Still training
................................................................................
19:34:47 Still training
................................................................................
19:35:25 Still training
....................
19:35:36 Best Xval loss epoch 315, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
19:36:26 Still training
.

Compile time : 0.5802445411682129
Starting to train : 19:47:50
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
19:48:53 Still training
................................................................................
19:49:34 Still training
................................................................................
19:50:16 Still training
................................................................................
19:50:57 Still training
................................................................................
19:51:38 Still training
................................................................................
19:52:19 Still training
....................
19:52:31 Best Xval loss epoch 357, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
19:53:25 Still training
.

Compile time : 0.584650993347168
Starting to train : 20:05:36
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
20:06:41 Still training
................................................................................
20:07:23 Still training
................................................................................
20:08:07 Still training
................................................................................
20:08:52 Still training
................................................................................
20:09:35 Still training
................................................................................
20:10:18 Still training
....................
20:10:29 Best Xval loss epoch 455, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
20:11:21 Still training
..

Compile time : 0.5791051387786865
Starting to train : 20:23:12
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
20:24:18 Still training
................................................................................
20:25:00 Still training
................................................................................
20:25:40 Still training
................................................................................
20:26:20 Still training
................................................................................
20:26:59 Still training
................................................................................
20:27:41 Still training
....................
20:27:51 Best Xval loss epoch 326, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
20:28:42 Still training
.

Compile time : 0.5682244300842285
Starting to train : 20:40:24
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
20:41:28 Still training
................................................................................
20:42:09 Still training
................................................................................
20:42:48 Still training
................................................................................
20:43:27 Still training
................................................................................
20:44:07 Still training
.................................................................Stopping early

20:44:41 Best Xval loss epoch 66, value 0.002640
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
20:45:30 Still training
...............................................

Compile time : 0.57950758934021
Starting to train : 20:53:02
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
20:54:10 Still training
................................................................................
20:54:54 Still training
................................................................................
20:55:36 Still training
................................................................................
20:56:20 Still training
................................................................................
20:57:02 Still training
................................................................................
20:57:42 Still training
....................
20:57:51 Best Xval loss epoch 339, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
20:58:44 Still training
...

Compile time : 0.581695556640625
Starting to train : 21:11:09
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
21:12:17 Still training
................................................................................
21:12:59 Still training
................................................................................
21:13:40 Still training
................................................................................
21:14:21 Still training
................................................................................
21:15:03 Still training
................................................................................
21:15:43 Still training
....................
21:15:53 Best Xval loss epoch 383, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
21:16:45 Still training
..

Compile time : 0.5707087516784668
Starting to train : 21:28:40
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
21:29:48 Still training
................................................................................
21:30:31 Still training
................................................................................
21:31:14 Still training
................................................................................
21:31:56 Still training
................................................................................
21:32:38 Still training
................................................................................
21:33:21 Still training
....................
21:33:32 Best Xval loss epoch 316, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
21:34:25 Still training
.

Compile time : 0.5772697925567627
Starting to train : 21:46:27
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
21:47:34 Still training
................................................................................
21:48:13 Still training
................................................................................
21:48:52 Still training
................................................................................
21:49:33 Still training
................................................................................
21:50:13 Still training
................................................................................
21:50:52 Still training
....................
21:51:02 Best Xval loss epoch 26, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
21:51:50 Still training
..

Compile time : 0.5788211822509766
Starting to train : 21:56:28
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
21:57:40 Still training
................................................................................
21:58:22 Still training
................................................................................
21:59:06 Still training
................................................................................
21:59:49 Still training
................................................................................
22:00:34 Still training
................................................................................
22:01:16 Still training
....................
22:01:27 Best Xval loss epoch 194, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
22:02:21 Still training
.

Compile time : 0.5814123153686523
Starting to train : 22:15:07
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
22:16:18 Still training
................................................................................
22:17:00 Still training
................................................................................
22:17:44 Still training
................................................................................
22:18:26 Still training
................................................................................
22:19:12 Still training
................................................................................
22:19:54 Still training
....................
22:20:05 Best Xval loss epoch 11, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
22:21:01 Still training
..

Compile time : 0.5758779048919678
Starting to train : 22:33:37
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
22:34:48 Still training
................................................................................
22:35:38 Still training
................................................................................
22:36:23 Still training
................................................................................
22:37:06 Still training
................................................................................
22:37:49 Still training
................................................................................
22:38:30 Still training
....................
22:38:41 Best Xval loss epoch 177, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
22:39:37 Still training
.

Compile time : 0.5754470825195312
Starting to train : 22:52:34
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
22:53:45 Still training
...Stopping early

22:53:47 Best Xval loss epoch 9, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
.........................................................Stopping early

22:54:28 Best Xval loss epoch 11, value 0.004749
Training indexes 0 to 2042
Cross-validating indexes 2043 to 2552
................................................................................
22:55:34 Still training
Stopping early

22:55:34 Best Xval loss epoch 8, value 0.005792
Last Xval loss 0.005792
Avg Xval loss 0.004395
--------------------------------------------------------------------------------
22:55:34 Saving FFNN_0.004395_3_16_0.000000_0.250.h5
22:56:20 Running experiment 46 of 48
22:56:20 Generate splits [511, 1022, 1533, 2043, 2553]
22:56:

Compile time : 0.5799844264984131
Starting to train : 22:56:20
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
22:57:35 Still training
................................................................................
22:58:19 Still training
................................................................................
22:59:11 Still training
..........................................................Stopping early

22:59:57 Best Xval loss epoch 85, value 0.002632
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
23:01:13 Still training
................................................................................
23:02:28 Still training
................................................................................
23:03:45 Still training
......................................................

Compile time : 0.5863220691680908
Starting to train : 23:11:27
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
23:12:57 Still training
................................................................................
23:13:56 Still training
................................................................................
23:14:57 Still training
................................................................................
23:15:59 Still training
................................................................................
23:17:01 Still training
................................................................................
23:18:03 Still training
....................
23:18:19 Best Xval loss epoch 331, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
23:19:34 Still training
.

Compile time : 0.5947964191436768
Starting to train : 23:36:40
Training indexes 0 to 1021
Cross-validating indexes 1022 to 1532
................................................................................
23:38:17 Still training
................................................................................
23:39:18 Still training
................................................................................
23:40:19 Still training
................................................................................
23:41:21 Still training
................................................................................
23:42:21 Still training
................................................................................
23:43:22 Still training
....................
23:43:38 Best Xval loss epoch 419, value 0.002643
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
23:44:54 Still training
.

In [19]:
# list and chart experiments
flatlist = [list(l[0]) + [l[1]] for l in experiments.items()]

lossframe = pd.DataFrame(flatlist, columns=["n_hidden_layers", "layer_size", "reg_penalty", "dropout",
                                            "loss"])
lossframe.sort_values(['loss'])

Unnamed: 0,n_hidden_layers,layer_size,reg_penalty,dropout,loss
43,3,8,0.01,0.25,0.0039
42,3,8,0.001,0.25,0.0039
23,2,4,0.01,0.25,0.0039
3,1,2,0.01,0.25,0.0039
10,1,8,0.001,0.25,0.0039
18,2,2,0.001,0.25,0.0039
38,3,4,0.001,0.25,0.0039
31,2,16,0.01,0.25,0.0039
47,3,16,0.01,0.25,0.0039
27,2,8,0.01,0.25,0.0039


In [20]:
# we can pick lowest loss , but first we look at patterns by hyperparameter
# if a more parsimonious model has nearly same result, pick more parsimonious model
pd.DataFrame(lossframe.groupby(['n_hidden_layers'])['loss'].mean())


Unnamed: 0_level_0,loss
n_hidden_layers,Unnamed: 1_level_1
1,0.004373
2,0.004149
3,0.003992


In [21]:
pd.DataFrame(lossframe.groupby(['layer_size'])['loss'].mean())


Unnamed: 0_level_0,loss
layer_size,Unnamed: 1_level_1
2,0.004043
4,0.004179
8,0.004206
16,0.004256


In [22]:
pd.DataFrame(lossframe.groupby(['reg_penalty'])['loss'].mean())


Unnamed: 0_level_0,loss
reg_penalty,Unnamed: 1_level_1
0.0,0.004567
0.0001,0.004298
0.001,0.00392
0.01,0.0039


In [23]:
pd.DataFrame(lossframe.groupby(['dropout'])['loss'].mean())


Unnamed: 0_level_0,loss
dropout,Unnamed: 1_level_1
0.25,0.004171


In [24]:
def plot_matrix(lossframe, x_labels, y_labels, x_suffix="", y_suffix=""):

    pivot = lossframe.pivot_table(index=[x_labels], columns=[y_labels], values=['loss'])
    # specify labels as strings, to force it to use a discrete axis
    if lossframe[x_labels].dtype == np.float64 or lossframe[x_labels].dtype == np.float32:
        xaxis = ["%f %s" % (i, x_suffix) for i in pivot.columns.levels[1].values]
    else:
        xaxis = ["%d %s" % (i, x_suffix) for i in pivot.columns.levels[1].values]
    if lossframe[y_labels].dtype == np.float64 or lossframe[y_labels].dtype == np.float32:
        yaxis = ["%f %s" % (i, y_suffix) for i in pivot.index.values]
    else:
        yaxis = ["%d %s" % (i, y_suffix) for i in pivot.index.values]
        
    print(xaxis, yaxis)
    """plot a heat map of a matrix"""
    chart_width=640
    chart_height=480
    
    layout = Layout(
        title="%s v. %s" % (x_labels, y_labels),
        height=chart_height,
        width=chart_width,     
        margin=dict(
            l=150,
            r=30,
            b=120,
            t=100,
        ),
        xaxis=dict(
            title=y_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
        yaxis=dict(
            title=x_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
    )
    
    data = [Heatmap(z=pivot.values,
                    x=xaxis,
                    y=yaxis,
                    colorscale=[[0, 'rgb(0,0,255)', [1, 'rgb(255,0,0)']]],
                   )
           ]

    fig = Figure(data=data, layout=layout)
    return iplot(fig, link_text="")

plot_matrix(lossframe, "n_hidden_layers", "layer_size", x_suffix=" units", y_suffix=" layers")



['2  units', '4  units', '8  units', '16  units'] ['1  layers', '2  layers', '3  layers']


In [25]:
plot_matrix(lossframe, "reg_penalty", "dropout", x_suffix=" d", y_suffix=" r")


['0.250000  d'] ['0.000000  r', '0.000100  r', '0.001000  r', '0.010000  r']


In [26]:
experiments[key], model = run_experiment(n_hidden_layers = 2,
                                         hidden_layer_size = 2,
                                         reg_penalty = 0.01,
                                         dropout = 0.25,
                                         epochs=500)

00:10:02 Generate splits [511, 1022, 1533, 2043, 2553]
00:10:02 Build model
layer 0 size 2, reg_penalty 0.01000000, dropout 0.250
layer 1 size 2, reg_penalty 0.01000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, 420)          0                                            
__________________________________________________________________________________________________
Dense00 (Dense)                 (None, 2)            842         main_input[0][0]                 
__________________________________________________________________________________________________
Dropout00 (Dropout)             (None, 2)            0           Dense00[0][0]                    
__________________________________________________________________________________________________
Dense01 (Dense)         

................................................................................
00:15:33 Still training
................................................................................
00:16:31 Still training
....................
00:16:45 Best Xval loss epoch 10, value 0.002644
Training indexes 0 to 1532
Cross-validating indexes 1533 to 2042
................................................................................
00:18:00 Still training
................................................................................
00:19:16 Still training
................................................................................
00:20:53 Still training
................................................................................
00:22:50 Still training
................................................................................
00:25:00 Still training
................................................................................
00:27:02 Still training
....................
00:2

In [27]:
# 1000 training data to start, start backtest around 1987
START = 1000
print(wdata.iloc[START][:30])

Food.pct     0.023733
Beer.pct     0.038407
Smoke.pct    0.037266
Games.pct    0.029874
Books.pct    0.014543
Hshld.pct    0.039553
Clths.pct    0.011233
Hlth.pct     0.027252
Chems.pct    0.039733
Txtls.pct    0.011334
Cnstr.pct    0.033597
Steel.pct    0.050334
FabPr.pct    0.034223
ElcEq.pct    0.056733
Autos.pct    0.038819
Carry.pct    0.010501
Mines.pct    0.063768
Coal.pct     0.029251
Oil.pct      0.049109
Util.pct     0.006603
Telcm.pct    0.022248
Servs.pct    0.046065
BusEq.pct    0.035322
Paper.pct    0.053060
Trans.pct    0.021072
Whlsl.pct    0.013071
Rtail.pct    0.002181
Meals.pct    0.014499
Fin.pct      0.007585
Other.pct    0.035345
Name: 1987-01-16 00:00:00, dtype: float64


In [28]:
EPOCHS=500

def fit_predict(X, Y, model, epochs=EPOCHS, npredict=1, verbose=False):
    """for backtest, train model using Y_list v. X using n-npredict rows
    generate npredict prediction Y_list using last npredict rows of X
    if npredict=1, fit using n-1 rows, return prediction using X for final month
    if npredict=26, fit using n-26 rows, return prediction using X for final 26 months"""
    
    nrows = X.shape[0]
    if verbose:
        print("Fit on %d rows 0 to %d" % (nrows-npredict, nrows-npredict-1))
        print("Predict on %d rows %d to %d" % (npredict, nrows-npredict, nrows-1))
        
    # keep last rows to predict against
    X_predict = X[-npredict:]
    X_predict = X_predict.reshape(npredict,X.shape[1])
    # fit on remaining rows
    X_fit = X[:-npredict]
    Y_fit = Y[:-npredict]
    
    # make a list of Ys expected by Keras
    Y_list = []
    for i in range(OUTPUT_DIM):
        Y_list.append(Y_fit[:,i])
        
    fit = model.fit(
        X_fit,
        Y_list,
        batch_size=BATCH_SIZE,
        epochs=epochs,
        verbose=0)
    
    Z = model.predict(X_predict)
    # get back a list of ncols arrays, reshape each to 1D 1 x npredict array
    Z = [z.reshape(npredict) for z in Z]
    # return npredict x ncols array
    return np.array(Z).transpose()

print("%s Start fit" % (time.strftime("%H:%M:%S")))
predictions = fit_predict(X, Y, model,epochs=3,npredict=3, verbose=True)
print("%s End fit" % (time.strftime("%H:%M:%S")))

predictions

00:38:23 Start fit
Fit on 2550 rows 0 to 2549
Predict on 3 rows 2550 to 2552
00:38:26 End fit


array([[0.00879972, 0.01015064, 0.01350363, 0.00947615, 0.00683789,
        0.0069973 , 0.00873702, 0.00951795, 0.00833488, 0.00938296,
        0.00807213, 0.00564879, 0.0078492 , 0.00868411, 0.00651611,
        0.01018613, 0.00629716, 0.00752538, 0.00813954, 0.00776408,
        0.00891852, 0.00968299, 0.00808005, 0.00813214, 0.00765343,
        0.00845265, 0.00989625, 0.01033768, 0.00917175, 0.00572225],
       [0.00879972, 0.01015064, 0.01350363, 0.00947615, 0.00683789,
        0.0069973 , 0.00873702, 0.00951795, 0.00833488, 0.00938296,
        0.00807213, 0.00564879, 0.0078492 , 0.00868411, 0.00651611,
        0.01018613, 0.00629716, 0.00752538, 0.00813954, 0.00776408,
        0.00891852, 0.00968299, 0.00808005, 0.00813214, 0.00765343,
        0.00845265, 0.00989625, 0.01033768, 0.00917175, 0.00572225],
       [0.00879972, 0.01015064, 0.01350363, 0.00947615, 0.00683789,
        0.0069973 , 0.00873702, 0.00951795, 0.00833488, 0.00938296,
        0.00807213, 0.00564879, 0.0078492 , 0.

In [36]:
EPOCHS=500

nrows = X.shape[0]
startindex = 1000

def run_backtest(X, Y, arg_dict, startindex=0, epochs=EPOCHS, step=1):
    global P
    global R 
    
    print("%s Starting backtest" % (time.strftime("%H:%M:%S")))
    P = np.zeros((Y.shape[0],OUTPUT_DIM))
    
    count = 0
    nrows = X.shape[0]
    for train_index in range(startindex, nrows, step):
        if train_index + step >= nrows:
            train_index = nrows-step
            
        model = build_model(n_hidden_layers = arg_dict["n_hidden_layers"],
                            hidden_layer_size = arg_dict["hidden_layer_size"], 
                            reg_penalty = arg_dict["reg_penalty"], 
                            dropout = arg_dict["dropout"],
                            verbose=arg_dict["verbose"])
        
        fp_index = train_index + step # eg 1000 + 26 = 1026

        # fit on e.g. 0:999, predict 1000-1025
        predictions = fit_predict(X[:fp_index, :], 
                                  Y[:fp_index], 
                                  model,
                                  epochs=epochs,
                                  npredict=step)
        # store in 1000:1025 - lining up with Ys not Xs
        for i in range(step):
            P[train_index + i]= predictions[i]
            sys.stdout.write('.')
            count += 1
            if count % 80 == 0:
                print("")
                print("%s Still training %d of %d" % (time.strftime("%H:%M:%S"), count, nrows-startindex))
            sys.stdout.flush()



In [37]:
def gen_returns(startindex):
    # generate returns
    global X
    global Y
    global P
    global R
    
    nrows = X.shape[0]

    R = np.zeros(nrows)
    NUM_POSITIONS = 6 # top quintile (and bottom)
    
    for train_index in range(startindex, nrows):
        # get indexes, sorted smallest to largest
        select_array = np.argsort(P[train_index])
        # leftmost 6
        short_indexes = select_array[:NUM_POSITIONS]
        # rightmost 6
        long_indexes = select_array[-NUM_POSITIONS:]
        # compute equal weighted long/short return
        # we forecast a 4-period return, so hold this for 4 quarters as 25% of port
        for i in range(4):
            if train_index+i < nrows: # don't exceed bounds of R
                # + 50% long * 0.25 * perf of long indexes
                R[train_index+i] = R[train_index+i] + 0.25 * 0.50 * np.mean(Y[train_index+i, long_indexes])
                # - 50% short * 0.25 * perf of short indexes
                R[train_index+i] = R[train_index+i] - 0.25 * 0.50 * np.mean(Y[train_index+i, short_indexes])
                
    # truncate to nonzero part of R            
    results = R[startindex:]
    
    index = pd.date_range(wdata.iloc[START].name,periods=results.shape[0], freq='W-FRI')
    perfdata = pd.DataFrame(results,index=index,columns=['Returns'])
    perfdata['Equity'] = 100 * np.cumprod(1 + results / 100)
    
    stats = perfdata['Equity'].calc_stats()
    
    retframe = pd.DataFrame([stats.stats.loc['start'],
                             stats.stats.loc['end'],
                             stats.stats.loc['cagr'],
                             stats.stats.loc['yearly_vol'],
                             stats.stats.loc['yearly_sharpe'],
                             stats.stats.loc['max_drawdown'],
                             ffn.core.calc_sortino_ratio(perfdata.Returns, rf=0, nperiods=results.shape[0], annualize=False),
                            ],
                            index = ['start',
                                     'end',
                                     'cagr',
                                     'yearly_vol',
                                     'yearly_sharpe',
                                     'max_drawdown',
                                     'sortino',
                                    ],
                            columns=['Value'])
    return retframe



In [None]:
START=1000
EPOCHS=500
STEP=52
arg_dict = {"n_hidden_layers" : 2,
            "hidden_layer_size" : 2,
            "reg_penalty" : 0.01,
            "dropout": 0.25,
            'verbose' : False
           }
     
#model = build_model(**arg_dict)
run_backtest(X, Y, arg_dict, startindex=START, step=STEP, epochs=EPOCHS)
gen_returns(START)

01:09:40 Starting backtest
................................................................................
01:30:25 Still training 80 of 1553
................................................................................
01:44:12 Still training 160 of 1553
................................................................................
01:48:58 Still training 240 of 1553
................................................................................
01:59:05 Still training 320 of 1553
................................................................................
02:04:24 Still training 400 of 1553
................................................................................
02:16:06 Still training 480 of 1553
................................................................................
02:22:06 Still training 560 of 1553
................................................................................
02:35:00 Still training 640 of 1553
......................................

In [None]:
P[START:]

In [None]:
Y.shape

In [None]:
startindex = START



In [None]:
retframe

In [None]:
# run performance chart
perf = 100 * np.cumprod(1 + results / 100)

def mychart(perf):
    x_coords = np.linspace(1970, 2016, perf.shape[0])
    
    trace1 = Scatter(
        x = x_coords,
        y = perf,
        name = 'Growth of $1',    
    )

    layout = Layout(
        autosize=False,
        width=600,
        height=480,
        yaxis=dict(
            type='log',
            autorange=True
        )
    )
    plotdata = [trace1]
    
    fig = Figure(data=plotdata, layout=layout)
    
    iplot(fig)
    
mychart(perf)