In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Reload all modules imported with %aimport
%load_ext autoreload
%autoreload 1

In [2]:
import pandas as pd
idx = pd.IndexSlice

import datetime as dt
from datetime import date
from datetime import timedelta
import dateutil.parser as dup

%aimport trans.data
%aimport trans.gtrans
%aimport trans.reg
%aimport trans.regpipe

from trans.data import GetData
gd = GetData()
from trans.gtrans import *
from trans.reg import Reg, RegAttr
from trans.regpipe import RegPipe

## Memorialize: a switch that causes repeatability by fixing the end date, etc.  
## It then writes is output to a "verify_" file for regression testing


In [3]:
Memorialize = False

In [4]:
today = dt.datetime.combine( date.today(), dt.time.min)
if Memorialize:
    today = dup.parse("03/09/2018")
    
end_fixed = today
today

start = dup.parse("01/01/2000")
start

datetime.datetime(2018, 4, 3, 0, 0)

datetime.datetime(2000, 1, 1, 0, 0)

In [5]:
gd = GetData()
univ = gd.existing()
univ.sort()

len(univ)

1118

In [6]:
sectors =  { 
    "Consumer Discretionary": "XLY",
    "Consumer Staples": "XLP",
    "Energy": "XLE",
    "Financial": "XLF",
    "Health": "XLV",
    "Industrial": "XLI", 
    "Materials" : "XLB",
    "Real Estate": "XLRE",
    "Technology": "XLK", 
    "Telecom": "XTL",
    "Utilities": "XLU"
}
   

In [7]:
sector_tickers = list( sectors.values() )

In [8]:
sector_tickers

['XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLRE', 'XLK', 'XTL', 'XLU']

## Download data

In [9]:
get = False
if get:
    changed_tickers = gd.get_data( sector_tickers, start, today )
    len(changed_tickers)
    list( set(sector_tickers) - set(changed_tickers))

Extend XLY from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
get_one: Yahoo exception for XLY: <class 'pandas_datareader._utils.RemoteDataError'> - Unable to read URL: https://query1.finance.yahoo.com/v7/finance/download/XLY?period1=1522296000&period2=1522814399&interval=1d&events=history&crumb=XQuVa7%5Cu002F8lvw
get_one: Yahoo error for XLY, re-try 1.
Extend XLP from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLE from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLF from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLV from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLI from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLB from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLRE from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XLK from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
Extend XTL from 03/28/2018 beginning on 03/29/2018 to 04/03/2018.
get_one: Yahoo exception for XTL: <class 'pan

11

[]

## Assemble data (already downloaded) into DataFrame
- Note: The index will be a DateTime already, no need to convert from string. No need for DatetimeIndexTransformer
- Note: the index will be restricted to dates from SPY, no need for RestrictToCalendarColTransformer

In [10]:
price_df = GetDataTransformer(sector_tickers, cal_ticker="SPY").fit_transform( pd.DataFrame())
if Memorialize:
    price_df = price_df.loc[:end_fixed,:]
    
price_df.shape

(4588, 72)

In [11]:
price_df.index.min()
price_df.index.max()
price_df.loc[:, idx["Adj Close",:]].shape

if Memorialize:
    gd.save_data( price_df.loc[:, idx["Adj Close",:]], "verify_sectors_raw_df.pkl")

Timestamp('2000-01-03 00:00:00')

Timestamp('2018-03-28 00:00:00')

(4588, 12)

## Compute returns

In [12]:
type(price_df.index)

pandas.tseries.index.DatetimeIndex

In [13]:
pipe_pct   = make_pipeline(GenSelectAttrsTransformer(['Adj Close'], dropSingle=False),
                           pctTrans,
                           GenRenameAttrsTransformer(lambda col: "Pct", level=0)
                          )
pct_df = pipe_pct.fit_transform(price_df)
pct_df.tail()

transform: pandas version <= 0.20.


Unnamed: 0_level_0,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct
Unnamed: 0_level_1,SPY,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2018-03-22,-0.024997,-0.029881,-0.020414,-0.036908,-0.03301,-0.025815,-0.00828,-0.003588,0.004441,-0.028769,-0.021262,-0.01885
2018-03-23,-0.003187,-0.001575,0.009973,-0.003977,0.002032,-0.00106,0.000388,-0.002946,-0.002412,-0.00086,-0.00411,-0.005833
2018-03-26,-0.003043,-0.00596,-0.007826,-0.003993,-0.004191,-0.003941,-0.004464,-0.006566,-0.005238,-0.012302,0.000197,-0.010955
2018-03-27,-0.005457,-0.002116,0.000163,-0.010569,-0.002579,-0.017197,0.005264,0.008923,0.018226,0.000872,-0.006582,-0.003109
2018-03-28,-0.005909,-0.014579,-0.02005,-0.000368,-0.006942,-0.006658,0.013964,0.018343,-0.00179,0.005226,-0.013746,-0.011034


In [14]:
if Memorialize:
    gd.save_data( pct_df, "verify_sectors_pct_df.pkl")

## Alternate way of creating Returns: drop attribute and re-add

In [15]:
pipe_pct   = make_pipeline(GenSelectAttrsTransformer(['Adj Close'], dropSingle=True), 
                           # RestrictToCalendarColTransformer( "SPY" ),
                           pctTrans,
                           # DatetimeIndexTransformer("Dt"),
                           # RestrictToNonNullTransformer("all"),
                           AddAttrTransformer('Pct')
                      )
pct_df = pipe_pct.fit_transform(price_df)
pct_df.shape
pct_df.tail()

(4588, 12)

Unnamed: 0_level_0,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct,Pct
Unnamed: 0_level_1,SPY,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2018-03-22,-0.024997,-0.029881,-0.020414,-0.036908,-0.03301,-0.025815,-0.00828,-0.003588,0.004441,-0.028769,-0.021262,-0.01885
2018-03-23,-0.003187,-0.001575,0.009973,-0.003977,0.002032,-0.00106,0.000388,-0.002946,-0.002412,-0.00086,-0.00411,-0.005833
2018-03-26,-0.003043,-0.00596,-0.007826,-0.003993,-0.004191,-0.003941,-0.004464,-0.006566,-0.005238,-0.012302,0.000197,-0.010955
2018-03-27,-0.005457,-0.002116,0.000163,-0.010569,-0.002579,-0.017197,0.005264,0.008923,0.018226,0.000872,-0.006582,-0.003109
2018-03-28,-0.005909,-0.014579,-0.02005,-0.000368,-0.006942,-0.006658,0.013964,0.018343,-0.00179,0.005226,-0.013746,-0.011034


In [16]:
import dateutil.parser as dup
import dateutil.relativedelta as rd

regWindow = rd.relativedelta(months=+6)
regStep   = rd.relativedelta(weeks=+4)

regStart = dup.parse("01/01/2000")
regEnd   = dup.parse("12/29/2017")
# regEnd   = dup.parse("02/28/2018")

In [17]:
regParams = { "start": regStart, "end": regEnd, "window": regWindow, "step": regStep }
if Memorialize:
    gd.save_data( regParams, "verify_regParams.pkl")

## Compute the model: 
$Return_{sector ticker} = \beta_0 + \beta * Return_{SPY} + \epsilon$

In [18]:
rp = RegPipe( pct_df )
rp.indCols( [ idx["Pct", "SPY"] ] )
rp.regress( regStart, regEnd, regWindow, regStep)

IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLB'), cols [('Pct', 'SPY'), ('Pct', 'XLB')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLE'), cols [('Pct', 'SPY'), ('Pct', 'XLE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLF'), cols [('Pct', 'SPY'), ('Pct', 'XLF')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLI'), cols [('Pct', 'SPY'), ('Pct', 'XLI')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLK'), cols [('Pct', 'SPY'), ('Pct', 'XLK')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLP'), cols [('Pct', 'SPY'), ('Pct', 'XLP')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLRE'), cols [('Pct', 'SPY'), ('Pct', 'XLRE')]
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: the following columns have naN:  ['XLRE']
Fit: t

In [19]:
rp.beta_df.shape
rp.beta_df.tail()

(229, 22)

Unnamed: 0_level_0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,...,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1
Unnamed: 0_level_1,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,...,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-09-08,4e-05,-0.000762,-0.000633,-6.1e-05,0.000298,-3.1e-05,0.000423,0.000777,0.000454,-0.000135,...,0.707524,1.297794,1.096055,1.280018,0.478708,0.508654,0.131469,0.834555,1.003449,1.326025
2017-10-06,4.6e-05,-0.000526,5.6e-05,0.00016,0.000109,-0.000334,-3.1e-05,0.000314,0.000363,-0.000267,...,0.615224,1.294375,1.033866,1.340575,0.497813,0.461484,0.136299,0.826531,1.009214,1.366022
2017-11-03,0.000196,-6.3e-05,0.000188,-2.3e-05,0.000206,-0.000434,0.000118,0.000597,9e-05,-0.00051,...,0.54199,1.133993,0.99506,1.439256,0.473676,0.460484,0.15849,0.827925,1.04799,1.318488
2017-12-01,6.7e-05,0.00023,0.000597,-5.5e-05,-9.6e-05,-0.000412,0.000124,0.000347,8e-05,-0.000298,...,0.542585,0.993585,1.043711,1.461687,0.556895,0.520425,0.135866,0.8405,1.025294,1.149932
2017-12-29,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,0.643274,1.131471,1.03434,1.369211,0.483971,0.56898,0.175113,0.921692,0.945197,1.158524


## Compute residuals:
 - For residual, don't roll beta: the date of the beta is the last date of the regression window
 - Fill the beta backwards, so the in-sample beta is applied

In [20]:
rollAmount = 0
fillMethod = "bfill"

rp.attrib_setup(pct_df, rp.beta_df, rollAmount, fillMethod)

In [21]:
rp.attrib()

rp.retAttr_df.shape
rp.retAttr_df.loc[:"2017-12-29",:].tail()

(4596, 44)

Unnamed: 0_level_0,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,...,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error
Unnamed: 0_level_1,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,...,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-12-22,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,0.001761,-0.002615,0.000457,-0.00036,0.001727,0.006834,0.00148,-0.002594,-0.001706,-0.00487
2017-12-26,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,0.009027,-0.001565,0.002219,-0.005123,0.00288,0.00609,-0.005794,0.001754,0.003316,-0.001946
2017-12-27,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,-0.004115,0.000455,0.001803,0.000327,-0.000395,0.003878,0.004041,0.00165,-0.002513,0.002188
2017-12-28,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,-0.000299,0.001534,-0.000225,-0.001202,-0.00256,0.004802,0.005085,-4.1e-05,0.000745,-6.9e-05
2017-12-29,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,-0.000968,-0.002916,0.001974,-0.000224,0.001664,0.003222,0.001137,-0.002245,-0.002316,-0.00419


## Demonstrate a non-rolling

In [22]:
regStarts = regEnd - regWindow + timedelta(days=1)

pct_dfs = pct_df.loc[ regStarts:regEnd,:]

rps = RegPipe( pct_dfs )
rps.indCols( [ idx["Pct", "SPY"] ] )
rps.regressSingle()

rps.beta_df.shape
rps.beta_df.tail()

IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLB'), cols [('Pct', 'SPY'), ('Pct', 'XLB')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLE'), cols [('Pct', 'SPY'), ('Pct', 'XLE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLF'), cols [('Pct', 'SPY'), ('Pct', 'XLF')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLI'), cols [('Pct', 'SPY'), ('Pct', 'XLI')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLK'), cols [('Pct', 'SPY'), ('Pct', 'XLK')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLP'), cols [('Pct', 'SPY'), ('Pct', 'XLP')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLRE'), cols [('Pct', 'SPY'), ('Pct', 'XLRE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLU'), cols [('Pct', 'SPY'), ('Pct', 'XLU')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLV'), cols [('Pct', 'SPY'), ('Pct', 'XLV')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLY'), cols [('Pct', 'SPY'), ('Pct', 'XLY')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XTL'), cols [('Pct', 'SPY'), ('Pct', 'XTL')]


(1, 22)

Unnamed: 0_level_0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,...,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1
Unnamed: 0_level_1,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,...,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-12-29,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,0.643274,1.131471,1.03434,1.369211,0.483971,0.56898,0.175113,0.921692,0.945197,1.158524


In [23]:
if Memorialize:
    gd.save_data( rps.beta_df, "verify_beta_df.pkl")

In [24]:
rollAmount = 0
fillMethod = "bfill"

rps.attrib_setup(pct_dfs, rps.beta_df, rollAmount, fillMethod)
rps.attrib()

rps.retAttr_df.shape
rps.retAttr_df.loc[:"2017-12-29",:].tail()

(127, 44)

Unnamed: 0_level_0,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,...,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error
Unnamed: 0_level_1,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,...,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-12-22,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,0.001761,-0.002615,0.000457,-0.00036,0.001727,0.006834,0.00148,-0.002594,-0.001706,-0.00487
2017-12-26,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,0.009027,-0.001565,0.002219,-0.005123,0.00288,0.00609,-0.005794,0.001754,0.003316,-0.001946
2017-12-27,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,-0.004115,0.000455,0.001803,0.000327,-0.000395,0.003878,0.004041,0.00165,-0.002513,0.002188
2017-12-28,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,-0.000299,0.001534,-0.000225,-0.001202,-0.00256,0.004802,0.005085,-4.1e-05,0.000745,-6.9e-05
2017-12-29,0.000156,0.000496,6.7e-05,7.9e-05,9.9e-05,-1.5e-05,-0.000165,9.3e-05,-0.00041,3.7e-05,...,-0.000968,-0.002916,0.001974,-0.000224,0.001664,0.003222,0.001137,-0.002245,-0.002316,-0.00419


In [25]:
sector_residuals = rps.retAttr_df.loc[:, idx["Error",:]]
sector_residuals.tail()

Unnamed: 0_level_0,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error,Error
Unnamed: 0_level_1,XLB,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY,XTL
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
2017-12-22,0.003942,0.001761,-0.002615,0.000457,-0.00036,0.001727,0.006834,0.00148,-0.002594,-0.001706,-0.00487
2017-12-26,0.000257,0.009027,-0.001565,0.002219,-0.005123,0.00288,0.00609,-0.005794,0.001754,0.003316,-0.001946
2017-12-27,0.000666,-0.004115,0.000455,0.001803,0.000327,-0.000395,0.003878,0.004041,0.00165,-0.002513,0.002188
2017-12-28,0.002182,-0.000299,0.001534,-0.000225,-0.001202,-0.00256,0.004802,0.005085,-4.1e-05,0.000745,-6.9e-05
2017-12-29,0.001944,-0.000968,-0.002916,0.001974,-0.000224,0.001664,0.003222,0.001137,-0.002245,-0.002316,-0.00419


In [26]:
if Memorialize:
    gd.save_data(sector_residuals, "sector_residuals.pkl")

In [27]:
resStart = dup.parse("01/01/2016")

## OBSOLETE, replaced by trans.stack_residual

In [28]:
from trans.stack import Stack
%aimport trans.stack

s = Stack(pct_df)
stack = s.repeated(resStart, regEnd, regWindow, regStep)

IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLB'), cols [('Pct', 'SPY'), ('Pct', 'XLB')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLE'), cols [('Pct', 'SPY'), ('Pct', 'XLE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLF'), cols [('Pct', 'SPY'), ('Pct', 'XLF')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLI'), cols [('Pct', 'SPY'), ('Pct', 'XLI')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLK'), cols [('Pct', 'SPY'), ('Pct', 'XLK')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLP'), cols [('Pct', 'SPY'), ('Pct', 'XLP')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLRE'), cols [('Pct', 'SPY'), ('Pct', 'XLRE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLU'), cols [('Pct', 'SPY'), ('Pct', 'XLU')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLV'), cols [('Pct', 'SPY'), ('Pct', 'XLV')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLY'), cols [('Pct', 'SPY'), ('Pct', 'XLY')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XTL'), cols [('Pct', 'SPY'), ('Pct', 'XTL')]
IndCols: [('Pct', 'SPY')], dep

In [29]:
for stk in stack :
    suffix = stk[0].strftime("%Y%m%d")
    data = stk[1]
    
    if Memorialize:
        gd.save_data(data, "sector_residuals_{}.pkl".format(suffix))
    
             

## Residual stack

In [30]:
%aimport trans.stacked.residual
from trans.stacked.residual import Residual

rstack = Residual(debug=True)
rstack.init(df=pct_df, start=resStart, end=regEnd, window=regWindow, step=regStep)
resid_stack = rstack.repeated()
rstack.done()

nextChunk for period 2017-06-30 00:00:00 to 2017-12-29 00:00:00 shape: (127, 12)
repeated: chunk label 2017-12-29 00:00:00 with shape (127, 12)
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLB'), cols [('Pct', 'SPY'), ('Pct', 'XLB')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLE'), cols [('Pct', 'SPY'), ('Pct', 'XLE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLF'), cols [('Pct', 'SPY'), ('Pct', 'XLF')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLI'), cols [('Pct', 'SPY'), ('Pct', 'XLI')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLK'), cols [('Pct', 'SPY'), ('Pct', 'XLK')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLP'), cols [('Pct', 'SPY'), ('Pct', 'XLP')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLRE'), cols [('Pct', 'SPY'), ('Pct', 'XLRE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLU'), cols [('Pct', 'SPY'), ('Pct', 'XLU')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLV'), cols [('Pct', 'SPY'), ('Pct', 'XLV')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLY'), cols [('Pct'

In [31]:
if Memorialize:
    gd.save_data( resid_stack, "verify_resid_stack.pkl")

In [32]:
for stk in resid_stack :
    suffix = stk[0].strftime("%Y%m%d")
    data = stk[1]
    
    print("Stack {} shape: {}".format(stk[0], stk[1].shape))
    #gd.save_data(data, "sector_residuals_{}.pkl".format(suffix))         

Stack 2017-12-29 00:00:00 shape: (127, 11)
Stack 2017-12-01 00:00:00 shape: (128, 11)
Stack 2017-11-03 00:00:00 shape: (129, 11)
Stack 2017-10-06 00:00:00 shape: (127, 11)
Stack 2017-09-08 00:00:00 shape: (128, 11)
Stack 2017-08-11 00:00:00 shape: (126, 11)
Stack 2017-07-14 00:00:00 shape: (125, 11)
Stack 2017-06-16 00:00:00 shape: (124, 11)
Stack 2017-05-19 00:00:00 shape: (124, 11)
Stack 2017-04-21 00:00:00 shape: (124, 11)
Stack 2017-03-24 00:00:00 shape: (125, 11)
Stack 2017-02-24 00:00:00 shape: (126, 11)
Stack 2017-01-27 00:00:00 shape: (127, 11)
Stack 2016-12-30 00:00:00 shape: (127, 11)
Stack 2016-12-02 00:00:00 shape: (128, 11)
Stack 2016-11-04 00:00:00 shape: (129, 11)
Stack 2016-10-07 00:00:00 shape: (128, 11)
Stack 2016-09-09 00:00:00 shape: (128, 11)
Stack 2016-08-12 00:00:00 shape: (126, 11)
Stack 2016-07-15 00:00:00 shape: (125, 11)


## PCA stack

In [33]:
%aimport trans.stacked.pca

from trans.stacked.pca import PCA_stack

pstack = PCA_stack(debug=True)
pstack.init(stack=resid_stack)
pca_stack = pstack.repeated()
pstack.done()

nextChunk label 2017-12-29 00:00:00 with shape: (127, 11)
repeated: chunk label 2017-12-29 00:00:00 with shape (127, 11)
nextChunk label 2017-12-01 00:00:00 with shape: (128, 11)
repeated: chunk label 2017-12-01 00:00:00 with shape (128, 11)
nextChunk label 2017-11-03 00:00:00 with shape: (129, 11)
repeated: chunk label 2017-11-03 00:00:00 with shape (129, 11)
nextChunk label 2017-10-06 00:00:00 with shape: (127, 11)
repeated: chunk label 2017-10-06 00:00:00 with shape (127, 11)
nextChunk label 2017-09-08 00:00:00 with shape: (128, 11)
repeated: chunk label 2017-09-08 00:00:00 with shape (128, 11)
nextChunk label 2017-08-11 00:00:00 with shape: (126, 11)
repeated: chunk label 2017-08-11 00:00:00 with shape (126, 11)
nextChunk label 2017-07-14 00:00:00 with shape: (125, 11)
repeated: chunk label 2017-07-14 00:00:00 with shape (125, 11)
nextChunk label 2017-06-16 00:00:00 with shape: (124, 11)
repeated: chunk label 2017-06-16 00:00:00 with shape (124, 11)
nextChunk label 2017-05-19 00:00

In [34]:
for stk in pca_stack :
    suffix = stk[0].strftime("%Y%m%d")
    data = stk[1]
    
    print("Stack {} shape: {}".format(stk[0], stk[1].shape))
    #gd.save_data(data, "sector_residuals_{}.pkl".format(suffix))
    

Stack 2017-12-29 00:00:00 shape: (1, 26)
Stack 2017-12-01 00:00:00 shape: (1, 26)
Stack 2017-11-03 00:00:00 shape: (1, 26)
Stack 2017-10-06 00:00:00 shape: (1, 26)
Stack 2017-09-08 00:00:00 shape: (1, 26)
Stack 2017-08-11 00:00:00 shape: (1, 26)
Stack 2017-07-14 00:00:00 shape: (1, 26)
Stack 2017-06-16 00:00:00 shape: (1, 26)
Stack 2017-05-19 00:00:00 shape: (1, 26)
Stack 2017-04-21 00:00:00 shape: (1, 26)
Stack 2017-03-24 00:00:00 shape: (1, 26)
Stack 2017-02-24 00:00:00 shape: (1, 26)
Stack 2017-01-27 00:00:00 shape: (1, 26)
Stack 2016-12-30 00:00:00 shape: (1, 26)
Stack 2016-12-02 00:00:00 shape: (1, 26)
Stack 2016-11-04 00:00:00 shape: (1, 26)
Stack 2016-10-07 00:00:00 shape: (1, 26)
Stack 2016-09-09 00:00:00 shape: (1, 26)
Stack 2016-08-12 00:00:00 shape: (1, 26)
Stack 2016-07-15 00:00:00 shape: (1, 26)


## Composed (residual, PCA) stack

In [35]:
%aimport trans.stacked.pipeline

from trans.stacked.pipeline import Pipeline_stack

resid_obj = Residual()
pca_obj   = PCA_stack()

plstack = Pipeline_stack([ resid_obj, pca_obj ], debug=True)

## Inelegant: manuallly init one member of pipe
resid_obj.init(df=pct_df, start=resStart, end=regEnd, window=regWindow, step=regStep)
#plstack.init(stack=resid_stack)
pl_stack = plstack.repeated()
plstack.done()

repeated: chunk label 2017-12-29 00:00:00 with shape (127, 12)
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLB'), cols [('Pct', 'SPY'), ('Pct', 'XLB')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLE'), cols [('Pct', 'SPY'), ('Pct', 'XLE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLF'), cols [('Pct', 'SPY'), ('Pct', 'XLF')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLI'), cols [('Pct', 'SPY'), ('Pct', 'XLI')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLK'), cols [('Pct', 'SPY'), ('Pct', 'XLK')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLP'), cols [('Pct', 'SPY'), ('Pct', 'XLP')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLRE'), cols [('Pct', 'SPY'), ('Pct', 'XLRE')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLU'), cols [('Pct', 'SPY'), ('Pct', 'XLU')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLV'), cols [('Pct', 'SPY'), ('Pct', 'XLV')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XLY'), cols [('Pct', 'SPY'), ('Pct', 'XLY')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'XTL'), cols 

In [36]:
for stk in pl_stack :
    suffix = stk[0].strftime("%Y%m%d")
    data = stk[1]
    
    print("Stack {} shape: {}".format(stk[0], stk[1].shape))
    #gd.save_data(data, "sector_residuals_{}.pkl".format(suffix))
    

Stack 2017-12-29 00:00:00 shape: (1, 26)
Stack 2017-12-01 00:00:00 shape: (1, 26)
Stack 2017-11-03 00:00:00 shape: (1, 26)
Stack 2017-10-06 00:00:00 shape: (1, 26)
Stack 2017-09-08 00:00:00 shape: (1, 26)
Stack 2017-08-11 00:00:00 shape: (1, 26)
Stack 2017-07-14 00:00:00 shape: (1, 26)
Stack 2017-06-16 00:00:00 shape: (1, 26)
Stack 2017-05-19 00:00:00 shape: (1, 26)
Stack 2017-04-21 00:00:00 shape: (1, 26)
Stack 2017-03-24 00:00:00 shape: (1, 26)
Stack 2017-02-24 00:00:00 shape: (1, 26)
Stack 2017-01-27 00:00:00 shape: (1, 26)
Stack 2016-12-30 00:00:00 shape: (1, 26)
Stack 2016-12-02 00:00:00 shape: (1, 26)
Stack 2016-11-04 00:00:00 shape: (1, 26)
Stack 2016-10-07 00:00:00 shape: (1, 26)
Stack 2016-09-09 00:00:00 shape: (1, 26)
Stack 2016-08-12 00:00:00 shape: (1, 26)
Stack 2016-07-15 00:00:00 shape: (1, 26)
