# Correlation Calculations

**Formula**
$$r = \frac{\Sigma(x-\bar{x})(y-\bar{y})}{\sqrt{\Sigma(x-\bar{x})^2\Sigma(y-\bar{y}})^2}$$
* r: the correlation factor
* $\bar{x}$: mean x
* $\bar{y}$: mean y

In [1]:
import numpy as np
import pandas_datareader as pdr
import datetime as dt
import pandas as pd
import yfinance as yf


tickers = ['AAPL', 'MSFT', 'AMZN', 'IBM']
data = yf.download(tickers, period='12mo', interval='1d')
data = data['Adj Close']
log_return = np.log(data/data.shift())

[*********************100%***********************]  4 of 4 completed


In [2]:
log_return

Unnamed: 0_level_0,AAPL,AMZN,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-31,,,,
2022-04-01,-0.001720,0.003445,0.000999,0.003594
2022-04-04,0.023417,0.028844,0.000922,0.017778
2022-04-05,-0.019124,-0.025823,-0.010650,-0.013070
2022-04-06,-0.018623,-0.032833,0.003872,-0.037293
...,...,...,...,...
2023-03-24,0.008271,-0.005893,0.015443,0.010426
2023-03-27,-0.012369,-0.000918,0.031582,-0.015047
2023-03-28,-0.003988,-0.008193,0.000232,-0.004170
2023-03-29,0.019597,0.030485,0.002857,0.019002


In [3]:
log_return.corr()

Unnamed: 0,AAPL,AMZN,IBM,MSFT
AAPL,1.0,0.704091,0.486511,0.793052
AMZN,0.704091,1.0,0.349186,0.755842
IBM,0.486511,0.349186,1.0,0.42208
MSFT,0.793052,0.755842,0.42208,1.0


In [4]:
sp500 = yf.download("^GSPC", period='12mo', interval='1d')

[*********************100%***********************]  1 of 1 completed


In [6]:
sp500 = sp500['Adj Close']
sp500.head()

Date
2022-03-31    4530.410156
2022-04-01    4545.859863
2022-04-04    4582.640137
2022-04-05    4525.120117
2022-04-06    4481.149902
Name: Adj Close, dtype: float64

In [7]:
log_return['sp500'] = np.log(sp500/sp500.shift())
log_return.head()

Unnamed: 0_level_0,AAPL,AMZN,IBM,MSFT,sp500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-03-31,,,,,
2022-04-01,-0.00172,0.003445,0.000999,0.003594,0.003404
2022-04-04,0.023417,0.028844,0.000922,0.017778,0.008058
2022-04-05,-0.019124,-0.025823,-0.01065,-0.01307,-0.012631
2022-04-06,-0.018623,-0.032833,0.003872,-0.037293,-0.009764


In [8]:
log_return.corr()

Unnamed: 0,AAPL,AMZN,IBM,MSFT,sp500
AAPL,1.0,0.704091,0.486511,0.793052,0.876861
AMZN,0.704091,1.0,0.349186,0.755842,0.803862
IBM,0.486511,0.349186,1.0,0.42208,0.561662
MSFT,0.793052,0.755842,0.42208,1.0,0.848678
sp500,0.876861,0.803862,0.561662,0.848678,1.0


In [9]:
import matplotlib.pyplot as plt
%matplotlib notebook

def visualize(ticker1, ticker2):
    df = yf.download([ticker1, ticker2], period='12mo', interval='1d')
    df = df['Adj Close']
    df = df/df.iloc[0]
    fig, ax = plt.subplots()
    df.plot(ax=ax)
    path = './images/{t1}_{t2}_corr.png'.format(t1=ticker1, t2=ticker2)
    plt.savefig(path)

In [10]:
visualize('AAPL', "^GSPC")

[*********************100%***********************]  2 of 2 completed


<IPython.core.display.Javascript object>

In [11]:
visualize('AAPL', 'IBM')

[*********************100%***********************]  2 of 2 completed


<IPython.core.display.Javascript object>

# Linear Regression Calculations

In [12]:
from sklearn.linear_model import LinearRegression
import pandas_datareader as pdr
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib notebook

X = np.random.randn(5000)
Y = np.random.randn(5000)
fig, ax = plt.subplots()
ax.scatter(X, Y, alpha=.2)
fig.savefig('./images/randomscater.png')

<IPython.core.display.Javascript object>

In [13]:
tickers = ['AAPL', 'MSFT', 'AMZN', 'IBM']
data = yf.download(tickers, period='12mo', interval='1d')
data = data['Adj Close']
log_return = np.log(data/data.shift())

[*********************100%***********************]  4 of 4 completed


In [14]:
X = log_return['AAPL'].iloc[1:].to_numpy().reshape(-1, 1)
Y = log_return['IBM'].iloc[1:].to_numpy().reshape(-1, 1)

In [19]:
def linear_regression(ticker1, ticker2):
    X = log_return[ticker1].iloc[1:].to_numpy().reshape(-1, 1)
    Y = log_return[ticker2].iloc[1:].to_numpy().reshape(-1, 1)
    lr = LinearRegression()
    lr.fit(X, Y)
    Y_pred = lr.predict(X)
    alpha = lr.intercept_[0]
    beta = lr.coef_[0, 0]
    fig, ax = plt.subplots()
    ax.set_title("Alpha: " + str(round(alpha, 5)) + ", Beta: " + str(round(beta, 3)))
    ax.scatter(X, Y)
    ax.plot(X, Y_pred, c='r')
    path = './images/{t1}_{t2}_ln.png'.format(t1=ticker1, t2=ticker2)
    fig.savefig(path)

In [20]:
linear_regression('AAPL', 'IBM')

<IPython.core.display.Javascript object>

In [21]:
linear_regression('AAPL', 'MSFT')

<IPython.core.display.Javascript object>