In [66]:
import numpy as np
import pandas as pd
import pandas_datareader as pdr
from sklearn.linear_model import LinearRegression
import statsmodels.api as ols

In [3]:
tickers = ["AAPL", "TWTR", "IBM", "MSFT", "^GSPC"]
stocks_price = pdr.get_data_yahoo(tickers, "2020-01-01")["Adj Close"]
stocks_price.head()

Symbols,AAPL,TWTR,IBM,MSFT,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,74.096458,32.299999,124.142921,157.903473,3257.850098
2020-01-03,73.376091,31.52,123.152855,155.937286,3234.850098
2020-01-06,73.96077,31.639999,122.932838,156.340378,3246.280029
2020-01-07,73.61293,32.540001,123.01535,154.914886,3237.179932
2020-01-08,74.797081,33.049999,124.042084,157.382446,3253.050049


In [5]:
log_returns = np.log(stocks_price / stocks_price.shift())
log_returns

Symbols,AAPL,TWTR,IBM,MSFT,^GSPC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,,,,,
2020-01-03,-0.009770,-0.024445,-0.008007,-0.012530,-0.007085
2020-01-06,0.007937,0.003800,-0.001788,0.002582,0.003527
2020-01-07,-0.004714,0.028048,0.000671,-0.009160,-0.002807
2020-01-08,0.015958,0.015551,0.008312,0.015803,0.004890
...,...,...,...,...,...
2021-10-25,-0.000336,-0.002091,-0.001879,-0.003337,0.004737
2021-10-26,0.004564,-0.011009,-0.004004,0.006405,0.001818
2021-10-27,-0.003153,-0.114026,-0.015537,0.041252,-0.005064
2021-10-28,0.024684,-0.009533,0.005338,0.003645,0.009781


In [17]:
X = log_returns["AAPL"].dropna().values.reshape(-1, 1)
y = log_returns["^GSPC"].dropna().values

In [18]:
ling_model = LinearRegression()
ling_model.fit(X, y)

LinearRegression()

In [19]:
ling_model.intercept_, ling_model.coef_

(-0.00011805304419945813, array([0.56905972]))

In [20]:
y_hat = ling_model.predict(X)

In [65]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=X.flatten(),
                         y=y,
                         name="Log of Returns",
                         mode="markers",
                         marker=dict(color="Blue",
                                     opacity=0.5,
                                     line=dict(width=2,
                                               color="Black"))))
fig.add_trace(go.Scatter(x=X.flatten(),
                         y=y_hat,
                         name="Linear Model",
                         mode="lines"))

fig.update_layout(height=1000,
                  width=1000,
                  title="SPC500 vs APPL<br>" +
                        f"alph = {ling_model.intercept_}<br>" +
                        f"beta = {ling_model.coef_[0]}<br>" + 
                        f"r = {ling_model.score(X, y)}",
                  xaxis_title="AAPL",
                  yaxis_title="SPC500",
                  font=dict(family="Courier New, monospace",
                            size=15,))

fig.show()

In [32]:
ling_model.score(X, y)

0.651483879708859