In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pymc3 as pm
import numpy as np
import pandas as pd
import arviz
from sklearn.preprocessing import scale
from yahoo_fin.stock_info import get_data
import warnings
warnings.filterwarnings('ignore')

In [None]:
ticker_list = ["dbx", "box"]
historical_data = {}
for ticker in ticker_list:
    historical_data[ticker] = get_data(ticker, start_date="6/28/2019", index_as_date = False, interval = "1d")

In [None]:
data = historical_data['dbx'][["close", "date"]].merge(historical_data['box'][["close", "date"]], on="date")
data.set_index("date", inplace=True)
data = data.rename({"close_x":"Dropbox","close_y":"Box"}, axis='columns')
data = data.fillna(method='ffill')
data.head()

In [None]:
returns = pd.DataFrame(data.pct_change().dropna())
returns.head()

In [None]:
price = data.apply(scale)
price.head()

In [None]:
print("Price Data\n",data.describe(),
      '\n\nReturns Data\n', returns.describe())

In [None]:
plt.figure(figsize=[8,6])
plt.subplot(2, 1, 1)
plt.title('Historical stock prices of Dropbox [28-06-2019 to 28-06-2021]\n',fontsize=12, color='black')
plt.plot(data['Dropbox'], label='Closing Price', color='black')
plt.legend(loc=2)

plt.subplot(2, 1, 2)
plt.title('Historical stock prices of Box [28-06-2019 to 28-06-2021]\n',fontsize=12, color='black')
plt.plot(data['Box'], label='Closing Price', color='black')
plt.legend(loc=2)

plt.tight_layout()

In [None]:
fig, axes= plt.subplots(figsize=(12,5), ncols=2)

data.plot(secondary_y='Dropbox', ax=axes[0])
axes[0].set_title('Closing Price Series')
points = axes[1].scatter(data.Box,
                         data.Dropbox,
                         c=np.linspace(0.1, 1, len(data)), 
                         s=15, 
                         cmap='spring')
axes[1].set_title('Price Correlation over Time')
cbar = plt.colorbar(points, ax=axes[1])
cbar.ax.set_yticklabels([str(p.year) for p in returns[::len(returns)//10].index]);

sns.regplot(x='Box', y='Dropbox', 
            data=data, 
            scatter=False, 
            color='k',
            line_kws={'lw':2, 
                      'ls':'--'},
            ax=axes[1])
sns.despine()
fig.tight_layout();

In [None]:
# Set up our priors by assuming a random walk over time
model = pm.Model()
with model:
    s_alpha = pm.Exponential('sigma alpha', 50.)
    alpha = pm.GaussianRandomWalk('alpha', sd=s_alpha, shape=len(data))
    
    s_beta = pm.Exponential('sigma beta', 50.)
    beta = pm.GaussianRandomWalk('beta', sd=s_beta, shape=len(data))

In [None]:
with model:
    reg = alpha + beta * price.Dropbox 
    sd = pm.HalfNormal('SD', sigma=0.1)
    likelihood = pm.Normal('y', mu=reg, sigma=sd, observed=price.Box)

In [None]:
pm.model_to_graphviz(model)

In [None]:
with model:
    trace_rw = pm.sample(tune=2000, draws=200,
                         cores=None, target_accept=.9,
                         random_seed=42)

In [None]:
# Graph shown in the PyMC3 docs
fig, axes = plt.subplots(figsize=(15, 5), ncols=2, sharex=True)

axes[0].plot(trace_rw['alpha'].T, 'r', alpha=.05)
axes[0].set_xlabel('Time') 
axes[0].set_ylabel('Alpha')
axes[0].set_title('Intercept')
axes[0].set_xticklabels([str(p.date()) for p in price[::len(price)//9].index])

axes[1].plot(trace_rw['beta'].T, 'b', alpha=.05)
axes[1].set_xlabel('Time') 
axes[1].set_ylabel('Beta')
axes[1].set_title('Slope')

fig.suptitle('Coefficient change over time', fontsize=14)
sns.despine()
fig.tight_layout()
fig.subplots_adjust(top=.9);

In [None]:
# Graph shown in the PyMC3 docs
x = np.linspace(price.Box.min(), 
                 price.Box.max())

dates = [str(p.year) for p in price[::len(price)//9].index]

colors = np.linspace(0.1, 1, len(price))
colors_sc = np.linspace(0.1, 1, len(trace_rw[::10]['alpha'].T))

cmap = plt.get_cmap('spring')

fig, ax = plt.subplots(figsize=(14, 8))

for i, (alpha, beta) in enumerate(zip(trace_rw[::25]['alpha'].T, 
                                      trace_rw[::25]['beta'].T)):
    for a, b in zip(alpha[::25], beta[::25]):
        ax.plot(x,
                a + b*x,
                alpha=.03,
                lw=.9,
                c=cmap(colors_sc[i]))
        
points = ax.scatter(price.Box,
                    price.Dropbox,
                    c=colors,
                    s=5, 
                    cmap=cmap)

cbar = plt.colorbar(points)
cbar.ax.set_yticklabels(dates);

ax.set_xlabel('Box')
ax.set_ylabel('Dropbox')
ax.set_title('Posterior predictive regression lines')
sns.despine()
fig.tight_layout();