# Correlation
- Correlation measures association, but doesn't show if x causes y or vice versa
- Correlation is a statistic that measures the degree to which two variables move in relation to each other.
- In finance, the correlation can measure the movement of a stock with that of a benchmark index, such as the S&P 500.

In [1]:
!pip install pandas
!pip install pandas_datareader
!pip install datetime
!pip install numpy



In [2]:
import pandas as pd
import pandas_datareader as pdr
import datetime as dt
import numpy as np

In [3]:
tickers = ['AAPL', 'TWTR', 'IBM', 'MSFT']
start = dt.datetime(2020, 1, 1)

data = pdr.get_data_yahoo(tickers, start)

In [4]:
data.head()

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,High,High,...,Low,Low,Open,Open,Open,Open,Volume,Volume,Volume,Volume
Symbols,AAPL,TWTR,IBM,MSFT,AAPL,TWTR,IBM,MSFT,AAPL,TWTR,...,IBM,MSFT,AAPL,TWTR,IBM,MSFT,AAPL,TWTR,IBM,MSFT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2019-12-31,289.866974,32.049999,121.259727,154.749741,293.649994,32.049999,134.039993,157.699997,293.679993,32.32,...,132.399994,156.449997,289.929993,31.860001,132.529999,156.770004,25247625.0,11055160.0,3778012.0,18393383.0
2020-01-02,296.480652,32.299999,122.508156,157.615128,300.350006,32.299999,135.419998,160.619995,300.600006,32.5,...,134.770096,158.330002,296.23999,32.310001,135.0,158.779999,33911864.0,10694420.0,3148833.0,22634546.0
2020-01-03,293.598267,31.52,121.53112,155.652512,297.429993,31.52,134.339996,158.619995,300.579987,32.099998,...,133.559998,158.059998,297.149994,31.709999,133.570007,158.320007,36633878.0,14440378.0,2373676.0,21121681.0
2020-01-06,295.937683,31.639999,121.314018,156.05484,299.799988,31.639999,134.100006,159.029999,299.959991,31.709999,...,133.199997,156.509995,293.790009,31.23,133.419998,157.080002,29644644.0,12585831.0,2426305.0,20826702.0
2020-01-07,294.545959,32.540001,121.395432,154.631989,298.390015,32.540001,134.190002,157.580002,300.899994,32.695,...,133.399994,157.320007,299.839996,31.799999,133.690002,159.320007,26207813.0,13484461.0,3004389.0,20903403.0


In [5]:
data=data['Adj Close']

In [6]:
data.head()

Symbols,AAPL,TWTR,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-31,289.866974,32.049999,121.259727,154.749741
2020-01-02,296.480652,32.299999,122.508156,157.615128
2020-01-03,293.598267,31.52,121.53112,155.652512
2020-01-06,295.937683,31.639999,121.314018,156.05484
2020-01-07,294.545959,32.540001,121.395432,154.631989


In [7]:
log_returns=np.log(data/data.shift())

In [8]:
log_returns

Symbols,AAPL,TWTR,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-31,,,,
2020-01-02,0.022560,0.007770,0.010243,0.018347
2020-01-03,-0.009770,-0.024445,-0.008007,-0.012530
2020-01-06,0.007937,0.003800,-0.001788,0.002581
2020-01-07,-0.004714,0.028048,0.000671,-0.009159
...,...,...,...,...
2021-12-06,0.021275,0.055480,0.008963,0.009797
2021-12-07,0.034833,-0.000225,0.013831,0.026412
2021-12-08,0.022527,0.027946,0.011774,0.000149
2021-12-09,-0.002975,0.016056,0.004461,-0.005598


In [9]:
log_returns.corr()

Symbols,AAPL,TWTR,IBM,MSFT
Symbols,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,1.0,0.139424,0.155248,0.267408
TWTR,0.139424,1.0,0.319246,0.515272
IBM,0.155248,0.319246,1.0,0.520327
MSFT,0.267408,0.515272,0.520327,1.0


In [10]:
sp500=pdr.get_data_yahoo("^GSPC",start)

In [11]:
sp500.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-12-31,3231.719971,3212.030029,3215.179932,3230.780029,2893810000,3230.780029
2020-01-02,3258.139893,3235.530029,3244.669922,3257.850098,3458250000,3257.850098
2020-01-03,3246.149902,3222.340088,3226.360107,3234.850098,3461290000,3234.850098
2020-01-06,3246.840088,3214.639893,3217.550049,3246.280029,3674070000,3246.280029
2020-01-07,3244.909912,3232.429932,3241.860107,3237.179932,3420380000,3237.179932


In [12]:
log_returns['SP500']=np.log(sp500['Adj Close']/sp500['Adj Close'].shift())

In [13]:
log_returns.corr()

Symbols,AAPL,TWTR,IBM,MSFT,SP500
Symbols,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAPL,1.0,0.139424,0.155248,0.267408,0.289579
TWTR,0.139424,1.0,0.319246,0.515272,0.576116
IBM,0.155248,0.319246,1.0,0.520327,0.723265
MSFT,0.267408,0.515272,0.520327,1.0,0.853597
SP500,0.289579,0.576116,0.723265,0.853597,1.0


In [21]:
def test_correlation(ticker):
    df = pdr.get_data_yahoo(ticker, start)
    lr = log_returns.copy()
    lr[ticker] = np.log(df['Adj Close']/df['Adj Close'].shift())
    return lr.corr()

In [25]:
test_correlation('OYY.SI')

Symbols,AAPL,TWTR,IBM,MSFT,SP500,OYY.SI
Symbols,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,1.0,0.139424,0.155248,0.267408,0.289579,0.014754
TWTR,0.139424,1.0,0.319246,0.515272,0.576116,-0.003077
IBM,0.155248,0.319246,1.0,0.520327,0.723265,0.103299
MSFT,0.267408,0.515272,0.520327,1.0,0.853597,0.043945
SP500,0.289579,0.576116,0.723265,0.853597,1.0,0.114105
OYY.SI,0.014754,-0.003077,0.103299,0.043945,0.114105,1.0


In [23]:
test_correlation('TLT')

Symbols,AAPL,TWTR,IBM,MSFT,SP500,TLT
Symbols,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,1.0,0.139424,0.155248,0.267408,0.289579,-0.120366
TWTR,0.139424,1.0,0.319246,0.515272,0.576116,-0.140185
IBM,0.155248,0.319246,1.0,0.520327,0.723265,-0.379993
MSFT,0.267408,0.515272,0.520327,1.0,0.853597,-0.263081
SP500,0.289579,0.576116,0.723265,0.853597,1.0,-0.405621
TLT,-0.120366,-0.140185,-0.379993,-0.263081,-0.405621,1.0


In [17]:
!pip install matplotlib



In [18]:
import matplotlib.pyplot as plt
%matplotlib notebook

In [19]:
def visualize_correlation(ticker1,ticker2):
    df=pdr.get_data_yahoo([ticker1,ticker2],start)
    df=df['Adj Close']
    df=df/df.iloc[0]
    fig, ax = plt.subplots()
    df.plot(ax=ax)

In [20]:
visualize_correlation('AAPL','TLT')

<IPython.core.display.Javascript object>

In [24]:
visualize_correlation("^GSPC", "TLT")

<IPython.core.display.Javascript object>

In [30]:
visualize_correlation("OYY.SI", "Z25.SI")

<IPython.core.display.Javascript object>