# Exponentially Weighted Covariances

In [1]:
import pandas as pd
import numpy as np
import datetime
import functools

import quandl

%load_ext rpy2.ipython

  from pandas.core.index import Index as PandasIndex


$$
\hat{\beta} = \frac{\mathrm{Cov}(x,y)}{\mathrm{Var}(x)}
$$

In [2]:
@functools.lru_cache(maxsize=1600)
def fetch_quandl(my_data_items, start_date=None, returns="pandas"):
    qdata = quandl.get(list(my_data_items), returns=returns, 
                      trim_start=start_date,
                      api_key=BrianBoonstraPrivateKeys['Quandl'])
    return qdata

In [3]:
data_df = fetch_quandl(('EOD/SUN','EOD/SPY'), 
                       start_date = datetime.date.today() - datetime.timedelta(days=14)
                      ).loc[:,['EOD/SUN - Adj_Close', 'EOD/SPY - Adj_Close']]
data_df.columns = ['SUN', 'SPY']
data_df

Unnamed: 0_level_0,SUN,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-27,22.797801,287.05
2020-04-28,22.971683,285.73
2020-04-29,23.947351,293.21
2020-04-30,23.976332,290.48
2020-05-01,23.445027,282.79
2020-05-04,23.609248,283.57
2020-05-05,23.343113,286.19
2020-05-06,23.46,284.25
2020-05-07,22.71,287.68
2020-05-08,23.84,292.44


In [6]:
ret_df = data_df.diff().div(data_df.shift(1)).shift(-1).iloc[:-1]
ret_df

Unnamed: 0_level_0,SUN,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-27,0.007627,-0.004599
2020-04-28,0.042473,0.026179
2020-04-29,0.00121,-0.009311
2020-04-30,-0.02216,-0.026473
2020-05-01,0.007005,0.002758
2020-05-04,-0.011273,0.009239
2020-05-05,0.005007,-0.006779
2020-05-06,-0.031969,0.012067
2020-05-07,0.049758,0.016546
2020-05-08,0.034815,0.000205


In [7]:
%%R 

exp_wtd_cov_unbiased = function(x1, x2, lamda) {
    # Slow, not using update formula, but avoiding R loops so better at small to medium size
    stopifnot(length(x1)==length(x2))
    subgroup_cov = function(i, x1, x2, lamda) {
        y1 = x1[1:i]
        y2 = x2[1:i]
        weights = (1 - lamda)^((i - 1):0)
        ewma1 = sum(weights * y1) / sum(weights)
        ewma2 = sum(weights * y2) / sum(weights)
        biased_cov = sum(weights * (y1 - ewma1)*(y2-ewma2)) / sum(weights)
        bias_correction = sum(weights)^2 / (sum(weights)^2 - sum(weights^2))
        ewmc = bias_correction * biased_cov
        ewmc
    }
    covs = sapply( 1:length(x1), subgroup_cov, x1 = x1, x2 = x2, lamda = lamda)
    covs
}

In [8]:
r_ew_covs = np.NaN * ret_df.SUN


In [9]:
%%R -i ret_df -o r_ew_covs

r_ew_covs = exp_wtd_cov_unbiased(ret_df$SUN, ret_df$SPY, 0.8)


In [10]:
r_ew_covs

0,1,2,3,4,5,6,7,8
,0.000536,0.00061,0.000453,...,-9e-05,-0.000283,0.00022,4.4e-05


In [11]:
pd_ew_covs = ret_df.ewm(alpha=0.8, adjust=True).cov()['SUN'].xs('SPY', level=1)
pd_ew_covs

Date
2020-04-27         NaN
2020-04-28    0.000536
2020-04-29    0.000610
2020-04-30    0.000453
2020-05-01    0.000364
2020-05-04   -0.000002
2020-05-05   -0.000090
2020-05-06   -0.000283
2020-05-07    0.000220
2020-05-08    0.000044
Name: SUN, dtype: float64

In [12]:
pd.DataFrame({'Pandas':pd_ew_covs, 'R':r_ew_covs})

Unnamed: 0_level_0,Pandas,R
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-04-27,,
2020-04-28,0.000536,0.000536
2020-04-29,0.00061,0.00061
2020-04-30,0.000453,0.000453
2020-05-01,0.000364,0.000364
2020-05-04,-2e-06,-2e-06
2020-05-05,-9e-05,-9e-05
2020-05-06,-0.000283,-0.000283
2020-05-07,0.00022,0.00022
2020-05-08,4.4e-05,4.4e-05
