In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pykalman import KalmanFilter
import yfinance as yf
import coin
from sklearn.linear_model import LinearRegression
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import adfuller
from datetime import timedelta
plt.rcParams['figure.figsize'] = [12, 8]

#from basicbacktester import BasicBacktester

In [None]:
def download_dados(tickers, interval, period):
    
    df = yf.download(tickers, interval=interval, period=period)['Adj Close']
    #retorno, df_ln = coin.calc_ret_ln(df)

    return coin.calc_ret_ln(df)

In [None]:
tickers = 'EWZ, PBR'

In [None]:
_, prices = download_dados(tickers, interval='1d', period='10y')

In [None]:
prices

In [None]:
ativo_1_array = np.array(prices[prices.columns[0]]).reshape(-1, 1)
ativo_2_array = np.array(prices[prices.columns[1]]).reshape(-1,1)

reg = LinearRegression().fit(ativo_1_array, ativo_2_array)
static_hedge_ratio = reg.coef_[0][0]

print(f'The static hedge ratio is {round(static_hedge_ratio, 2)}')

In [None]:
spread = prices[prices.columns[0]] - static_hedge_ratio*prices[prices.columns[1]]

In [None]:
adf_results = adfuller(spread.values)
print('ADF Statistic: %f' % adf_results[0])
print('p-value: %f' % adf_results[1])

In [None]:
plt.plot(spread, label='spread from static hedge ratio')
plt.legend()
plt.show()

In [None]:
def draw_date_coloured_scatterplot(etfs, prices):
    """
    Create a scatterplot of the two ETF prices, which is
    coloured by the date of the price to indicate the 
    changing relationship between the sets of prices    
    """
    # Create a yellow-to-red colourmap where yellow indicates
    # early dates and red indicates later dates
    plen = len(prices)
    colour_map = plt.cm.get_cmap('YlOrRd')    
    colours = np.linspace(0.1, 1, plen)
    
    # Create the scatterplot object
    scatterplot = plt.scatter(
        prices[etfs[0]], prices[etfs[1]], 
        s=30, c=colours, cmap=colour_map, 
        edgecolor='k', alpha=0.8
    )
    
    # Add a colour bar for the date colouring and set the 
    # corresponding axis tick labels to equal string-formatted dates
    colourbar = plt.colorbar(scatterplot)
    colourbar.ax.set_yticklabels(
        [str(p.date()) for p in prices[::plen//9].index]
    )
    plt.xlabel(prices.columns[0])
    plt.ylabel(prices.columns[1])
    plt.show()

In [None]:
draw_date_coloured_scatterplot([prices.columns[0], prices.columns[1]], prices)

In [None]:
class KalmanFilterPairs():
    def __init__(self, y, f, delta, Ve):
        self.y = y # observed variable
        self.f = f # variable that is part of the observation matrix
        self.F = np.array(ts.add_constant(f))[:, [1, 0]] # observation matrix
        self.delta = delta # parameter that adjusts the sensitivity of the state update
        self.Ve = Ve # state noise variance

In [None]:
# Initialising A Kalman Filter Algorithm

kfp = KalmanFilterPairs(y=prices[prices.columns[0]], 
                        f=prices[prices.columns[1]], 
                        delta=0.0001, 
                        Ve=0.1)

In [None]:
def calc_slope_intercept_kalman(etfs, prices):
    """
    Utilise the Kalman Filter from the pyKalman package
    to calculate the slope and intercept of the regressed
    ETF prices.
    """
    delta = 1e-5
    trans_cov = delta / (1 - delta) * np.eye(2)
    obs_mat = np.vstack(
        [prices[etfs[0]], np.ones(prices[etfs[0]].shape)]
    ).T[:, np.newaxis]
    
    kf = KalmanFilter(
        n_dim_obs=1, 
        n_dim_state=2,
        initial_state_mean=np.zeros(2),
        initial_state_covariance=np.ones((2, 2)),
        transition_matrices=np.eye(2),
        observation_matrices=obs_mat,
        observation_covariance=1.0,
        transition_covariance=trans_cov
    )
    
    state_means, state_covs = kf.filter(prices[etfs[1]].values)
    return state_means, state_covs

In [None]:
def draw_slope_intercept_changes(prices, state_means):
    """
    Plot the slope and intercept changes from the 
    Kalman Filte calculated values.
    """
    pd.DataFrame(
        dict(
            slope=state_means[:, 0], 
            intercept=state_means[:, 1]
        ), index=prices.index
    ).plot(subplots=True)
    plt.show()

In [None]:
state_means, state_covs = calc_slope_intercept_kalman([prices.columns[0], prices.columns[1]], prices.astype(float))
draw_slope_intercept_changes(prices, state_means)

In [None]:
static_spread = prices[prices.columns[0]] - static_hedge_ratio*prices[prices.columns[1]]

In [None]:
#plt.plot(prices.index, kfp.spread, label='Kalman Filter spread')
#plt.plot(prices.index, kfp.mean, label='Kalman Filter mean', linewidth=2, alpha=0.8)

plt.plot(prices.index, static_spread, label='Static hedge ratio spread', alpha=0.4)
plt.legend()
plt.show()

In [3]:
pip install pandas.io.data

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement pandas.io.data
ERROR: No matching distribution found for pandas.io.data
