In [11]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Reload all modules imported with %aimport
%load_ext autoreload
%autoreload 1

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import pandas as pd
idx = pd.IndexSlice

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.linear_model import LinearRegression

from datetime import timedelta

%aimport trans.data
%aimport trans.gtrans
%aimport trans.reg

from trans.data import GetData
gd = GetData()
from trans.gtrans import *
from trans.reg import Reg


pctTrans     = DataFrameFunctionTransformer(func = lambda s: s.pct_change())
rankTrans    = DataFrameFunctionTransformer(func = lambda s: s.rank(method="first"), axis=1)
pctOnlyTrans = GenSelectAttrsTransformer(['Pct'], dropSingle=False )

## Get the raw data

In [13]:
%aimport trans.data
raw_df = gd.combine_data(['FB', 'AAPL', 'AMZN', 
                           'NFLX', 'GOOG', 'SPY'])
raw_df.head()

Unnamed: 0_level_0,Open,Open,Open,Open,Open,Open,High,High,High,High,...,Adj Close,Adj Close,Adj Close,Adj Close,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,FB,AAPL,AMZN,NFLX,GOOG,SPY,FB,AAPL,AMZN,NFLX,...,AMZN,NFLX,GOOG,SPY,FB,AAPL,AMZN,NFLX,GOOG,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2000-01-03,,3.745536,81.5,,,148.25,,4.017857,89.5625,,...,89.375,,,103.807343,,133949200,16117600.0,,,8164300.0
2000-01-04,,3.866071,85.375,,,143.531204,,3.950893,91.5,,...,81.9375,,,99.747833,,128094400,17487400.0,,,8089800.0
2000-01-05,,3.705357,70.5,,,139.9375,,3.948661,75.125,,...,69.75,,,99.926254,,194580400,38457400.0,,,12177900.0
2000-01-06,,3.790179,71.3125,,,139.625,,3.821429,72.6875,,...,65.5625,,,98.320305,,191993200,18752000.0,,,6227200.0
2000-01-07,,3.446429,67.0,,,140.3125,,3.607143,70.5,,...,69.5625,,,104.030365,,115183600,10505400.0,,,8066500.0


## Define featUn transformer: compute Pct and append to Adj Close

In [14]:
pipe_close = make_pipeline(GenSelectAttrsTransformer(['Adj Close'], dropSingle=True )
                      )   

pipe_pct   = make_pipeline(GenSelectAttrsTransformer(['Adj Close'], dropSingle=True ), 
                         pctTrans,
                      )

featUn = GenDataFrameFeatureUnion( [ ("Adj Close", pipe_close),
                                    ("Pct", pipe_pct)
                                   ] )

In [15]:
pipe_pct_only   = make_pipeline(GenSelectAttrsTransformer(['Pct'], dropSingle=False ) ) 

## Create pipeline to prepare data for regression

In [16]:
pipe_nn = make_pipeline( featUn,
                         DatetimeIndexTransformer("Dt"),
                         pctOnlyTrans,
                         RestrictToCalendarColTransformer( ("Pct", "SPY")),
                         RestrictToNonNullTransformer("all"),
                         # FillNullTransformer(method="bfill")
                       )
pct_df = pipe_nn.fit_transform(raw_df)
pct_df.head()

Unnamed: 0_level_0,Pct,Pct,Pct,Pct,Pct,Pct
Unnamed: 0_level_1,AAPL,AMZN,FB,GOOG,NFLX,SPY
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2000-01-04,-0.08431,-0.083217,,,,-0.039106
2000-01-05,0.014634,-0.148741,,,,0.001789
2000-01-06,-0.086538,-0.060036,,,,-0.016071
2000-01-07,0.047368,0.06101,,,,0.058076
2000-01-10,-0.017588,-0.005391,,,,0.003431


## Do a rolling regression on the dataframe with prepared data

In [17]:
ra = Reg(pct_df)
ma = ra.modelCols( [ idx["Pct", "SPY"]])
ma

beta_df = ra.rollingModelAll( *ma, #idx["Pct", "AAPL"],
                 pd.to_datetime("01/01/2000",infer_datetime_format=True),
                 # pd.to_datetime("04/14/2000", infer_datetime_format=True),
                 pd.to_datetime("12/29/2017", infer_datetime_format=True),
                 timedelta(weeks=4)
            )
beta_df.tail()

([('Pct', 'SPY')],
 [('Pct', 'AAPL'),
  ('Pct', 'AMZN'),
  ('Pct', 'FB'),
  ('Pct', 'GOOG'),
  ('Pct', 'NFLX')])

IndCols: [('Pct', 'SPY')], depCol ('Pct', 'AAPL'), cols [('Pct', 'SPY'), ('Pct', 'AAPL')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'AMZN'), cols [('Pct', 'SPY'), ('Pct', 'AMZN')]
IndCols: [('Pct', 'SPY')], depCol ('Pct', 'FB'), cols [('Pct', 'SPY'), ('Pct', 'FB')]
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the following columns have naN:  ['FB']
Fit: the follo

Unnamed: 0_level_0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1
Unnamed: 0_level_1,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2017-09-08,-0.000272,-0.000858,0.000231,5.6e-05,0.00091,1.285798,1.464521,1.288074,1.239199,1.292776
2017-10-06,-0.005136,-0.001802,-0.004168,0.001119,1.3e-05,2.281003,1.691252,2.56647,0.923909,3.312801
2017-11-03,0.003963,0.001401,-1.9e-05,0.000439,-0.00082,1.733427,6.054137,2.509251,2.880951,1.734489
2017-12-01,-0.001837,0.001098,-0.002488,-0.002125,-0.006099,1.339091,1.08263,1.164516,0.837844,2.177337
2017-12-29,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602


## Append the rolling betas to the prepared data

In [20]:
concatTrans = DataFrameConcat( [ pct_df, beta_df ])
ret_and_beta_df = concatTrans.fit_transform(pd.DataFrame())
ret_and_beta_df.loc[:"2017-12-29",:].tail()
ret_and_beta_df.shape

Unnamed: 0_level_0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 1,Beta 1,Beta 1,Beta 1,Beta 1,Pct,Pct,Pct,Pct,Pct,Pct
Unnamed: 0_level_1,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX,SPY
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2017-12-22,,,,,,,,,,,0.0,-0.005448,-0.001409,-0.0033,0.006998,-0.000262
2017-12-26,,,,,,,,,,,-0.02537,0.00719,-0.006828,-0.003188,-0.011477,-0.001196
2017-12-27,,,,,,,,,,,0.000176,0.004674,0.009262,-0.006974,-0.008095,0.000487
2017-12-28,,,,,,,,,,,0.002814,0.003248,0.001689,-0.001172,0.03474,0.002057
2017-12-29,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602,-0.010814,-0.014021,-0.008206,-0.00166,-0.003892,-0.003771


(4569, 16)

In [12]:
gd.save_data(pct_df, "ret_df.pkl")
gd.save_data(beta_df, "beta_df.pkl")
gd.save_data(ret_and_beta_df, "ret_and_beta_df.pkl")

## Prepare for Return Attribution

### Find the attributes with the sensitivities

In [23]:
rab = RegAttr(ret_and_beta_df)
rab.setSens(beta_df)
betaAttrs = rab.sensAttrs('^Beta \d+$')
betaAttrs

['Beta 0', 'Beta 1']

## Roll the betas forward

In [46]:
beta_r_pl = make_pipeline( GenSelectAttrsTransformer(betaAttrs),
                            ShiftTransformer(1),
                            FillNullTransformer(method="ffill"),
                            GenRenameAttrsTransformer(lambda col: col + ' rolled fwd', level=0)
                         )
beta_rolled_df = beta_r_pl.fit_transform(ret_and_beta_df)
beta_rolled_df.tail()

transform: pandas version <= 0.20.


Unnamed: 0_level_0,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 1 rolled fwd,Beta 1 rolled fwd,Beta 1 rolled fwd,Beta 1 rolled fwd,Beta 1 rolled fwd
Unnamed: 0_level_1,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2018-02-13,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602
2018-02-14,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602
2018-02-15,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602
2018-02-16,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602
2018-02-20,-0.001793,-0.000217,-0.000183,0.001072,0.000821,1.699265,0.767625,0.859952,1.081924,0.908602


### Append the rolled betas to the regression results

In [48]:
ret_and_rolled_beta_pl = DataFrameConcat( [ ret_and_beta_df, beta_rolled_df])
ret_and_rolled_beta_df = ret_and_rolled_beta_pl.fit_transform( pd.DataFrame() )
ret_and_rolled_beta_df.tail()

Unnamed: 0_level_0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 0 rolled fwd,Beta 0 rolled fwd,...,Beta 1 rolled fwd,Beta 1 rolled fwd,Beta 1 rolled fwd,Beta 1 rolled fwd,Pct,Pct,Pct,Pct,Pct,Pct
Unnamed: 0_level_1,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX,...,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX,SPY
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-02-13,,,,,,-0.001793,-0.000217,-0.000183,0.001072,0.000821,...,0.767625,0.859952,1.081924,0.908602,0.010018,0.020401,-0.01848,0.000152,0.00124,0.002487
2018-02-14,,,,,,-0.001793,-0.000217,-0.000183,0.001072,0.000821,...,0.767625,0.859952,1.081924,0.908602,0.018437,0.025832,0.036789,0.016728,0.02993,0.013496
2018-02-15,,,,,,-0.001793,-0.000217,-0.000183,0.001072,0.000821,...,0.767625,0.859952,1.081924,0.908602,0.033578,0.007381,0.002451,0.018529,0.053647,0.01276
2018-02-16,,,,,,-0.001793,-0.000217,-0.000183,0.001072,0.000821,...,0.767625,0.859952,1.081924,0.908602,-0.003237,-0.008941,-0.014448,0.004846,-0.006244,0.000293
2018-02-20,,,,,,-0.001793,-0.000217,-0.000183,0.001072,0.000821,...,0.767625,0.859952,1.081924,0.908602,-0.003364,0.013571,-0.007612,0.006997,0.000108,-0.006115


In [29]:
reg = Reg(ret_and_rolled_beta_df)

### Find the columns for: 
#### independent variables
#### dependent variables
#### sensitivities

In [49]:
indCols = [ ("Pct", "1"), ("Pct", "SPY")]
indCols

rab2 = RegAttr(ret_and_rolled_beta_df)
rab2.setSens(beta_rolled_df)
sensAttrs = rab2.sensAttrs('^Beta \d+ rolled fwd$')
sensAttrs

depTickers = rab2.depTickersFromSensAttrs(sensAttrs )
depTickers
depCols = [ ("Pct", t) for t in depTickers ]
depCols



[('Pct', '1'), ('Pct', 'SPY')]

['Beta 0 rolled fwd', 'Beta 1 rolled fwd']

['AAPL', 'AMZN', 'FB', 'GOOG', 'NFLX']

[('Pct', 'AAPL'),
 ('Pct', 'AMZN'),
 ('Pct', 'FB'),
 ('Pct', 'GOOG'),
 ('Pct', 'NFLX')]

## Add constant (for interecept return) column

In [50]:
rab2.addConst(("Pct", "1"), 1)

addConst


### Perform the return attribution

In [51]:
retAttr_df =rab2.retAttrib(
            indCols,
            depCols, 
            sensAttrs)


In [52]:
retAttr_df.loc[:"2018-02-07",:].tail()

Unnamed: 0_level_0,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from 1,Contrib from SPY,Contrib from SPY,Contrib from SPY,Contrib from SPY,Contrib from SPY,Predicted,Predicted,Predicted,Predicted,Predicted,Error,Error,Error,Error,Error
Unnamed: 0_level_1,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX,AAPL,AMZN,FB,GOOG,NFLX
Dt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
2018-02-01,-0.001793,-0.000217,-0.000183,0.001072,0.000821,-0.001929,-0.000871,-0.000976,-0.001228,-0.001031,-0.003722,-0.001088,-0.00116,-0.000156,-0.00021,0.005812,-0.040879,0.034334,-0.001759,-0.019139
2018-02-02,-0.001793,-0.000217,-0.000183,0.001072,0.000821,-0.036993,-0.016711,-0.018721,-0.023553,-0.01978,-0.038785,-0.016928,-0.018904,-0.022481,-0.018959,-0.004605,0.045669,0.004352,-0.025305,0.027862
2018-02-05,-0.001793,-0.000217,-0.000183,0.001072,0.000821,-0.071068,-0.032104,-0.035965,-0.045249,-0.038,-0.07286,-0.032321,-0.036149,-0.044177,-0.037179,0.047876,0.004383,-0.011255,-0.006278,-0.012068
2018-02-06,-0.001793,-0.000217,-0.000183,0.001072,0.000821,0.033479,0.015124,0.016943,0.021316,0.017901,0.031687,0.014907,0.01676,0.022389,0.018723,0.010105,0.023108,0.005584,0.001101,0.026349
2018-02-07,-0.001793,-0.000217,-0.000183,0.001072,0.000821,-0.009218,-0.004164,-0.004665,-0.005869,-0.004929,-0.011011,-0.004381,-0.004848,-0.004797,-0.004108,-0.010396,-0.01368,-0.022835,-0.024835,-0.000258


In [37]:
gd.save_data(retAttr_df, "retattr_df.pkl")