In [4]:
# Portfolio correlation, risk and coherence

# Correlation measures association, the degree to which two variables move in relation to each other.
# Correlation factor r, is between from -1 to 0 to 1, meaning from a negative correlation, to a 
# positive correlation, with no correlation at all being in the middle.
# Correlation factor = the average observations of X / the average observations of Y


import pandas_datareader.data as pdr
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yfin
yfin.pdr_override()

# To get around a breaking change in the yahoo finance API

tickers = ['NVDA', 'INTC', 'AMD', 'TSM']
start = dt.datetime(2019, 1, 1)
data = pdr.get_data_yahoo(tickers, start)

[*********************100%***********************]  4 of 4 completed


In [5]:
data.head()

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,High,High,...,Low,Low,Open,Open,Open,Open,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,AMD,INTC,NVDA,TSM,AMD,INTC,NVDA,TSM,AMD,INTC,...,NVDA,TSM,AMD,INTC,NVDA,TSM,AMD,INTC,NVDA,TSM
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2019-01-02,18.83,41.513226,33.799728,32.560844,18.83,47.080002,34.055,36.52,19.0,47.470001,...,32.512501,36.0,18.01,45.959999,32.66,36.200001,87148700,18774600,50875200,5273100
2019-01-03,17.049999,39.229469,31.757652,30.635014,17.049999,44.490002,31.997499,34.360001,18.68,46.279999,...,31.922501,34.27,18.42,46.150002,33.447498,35.34,117277600,32267300,70555200,15998000
2019-01-04,19.0,41.636673,33.79229,31.178881,19.0,47.220001,34.047501,34.970001,19.07,47.57,...,32.424999,34.209999,17.549999,45.84,32.735001,34.459999,111878600,35447300,58562000,14178200
2019-01-07,20.57,41.830662,35.581264,31.410692,20.57,47.439999,35.849998,35.23,20.68,48.0,...,34.107498,34.970001,19.440001,47.099998,34.625,35.009998,107157000,22736800,70916000,6850800
2019-01-08,20.75,42.095192,34.695465,31.15213,20.75,47.740002,34.9575,34.939999,21.200001,48.029999,...,34.224998,34.639999,21.190001,47.799999,36.672501,35.110001,121271000,22749200,78601600,11462600


In [8]:
data = data['Adj Close']

In [9]:
data.head()

Unnamed: 0_level_0,AMD,INTC,NVDA,TSM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-02,18.83,41.513226,33.799728,32.560844
2019-01-03,17.049999,39.229469,31.757652,30.635014
2019-01-04,19.0,41.636673,33.79229,31.178881
2019-01-07,20.57,41.830662,35.581264,31.410692
2019-01-08,20.75,42.095192,34.695465,31.15213


In [10]:
# Normalising the stock prices, because they are on different scales and need to be converted to a 
# comparable basis:

log_returns = np.log(data/data.shift())

In [11]:
log_returns

Unnamed: 0_level_0,AMD,INTC,NVDA,TSM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-02,,,,
2019-01-03,-0.099301,-0.056584,-0.062319,-0.060967
2019-01-04,0.108289,0.059553,0.062099,0.017597
2019-01-07,0.079395,0.004648,0.051587,0.007407
2019-01-08,0.008713,0.006304,-0.025210,-0.008266
...,...,...,...,...
2023-04-24,-0.009773,-0.021348,-0.002843,-0.008588
2023-04-25,-0.044005,-0.026650,-0.030068,-0.028157
2023-04-26,0.025216,0.005869,0.026883,-0.000486
2023-04-27,0.017303,0.027501,0.009967,0.017117


In [12]:
log_returns.corr()
# When AMD is compared to AMD, is has a correlation of 1.


Unnamed: 0,AMD,INTC,NVDA,TSM
AMD,1.0,0.516657,0.780423,0.615678
INTC,0.516657,1.0,0.610178,0.54797
NVDA,0.780423,0.610178,1.0,0.666675
TSM,0.615678,0.54797,0.666675,1.0


In [13]:
# Comparing the correlation of the chip portfolio to the S&P500 index
sp500 = pdr.get_data_yahoo("^GSPC", start)

[*********************100%***********************]  1 of 1 completed


In [15]:
sp500.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-02,2476.959961,2519.48999,2467.469971,2510.030029,2510.030029,3733160000
2019-01-03,2491.919922,2493.139893,2443.959961,2447.889893,2447.889893,3858830000
2019-01-04,2474.330078,2538.070068,2474.330078,2531.939941,2531.939941,4234140000
2019-01-07,2535.610107,2566.159912,2524.560059,2549.689941,2549.689941,4133120000
2019-01-08,2568.110107,2579.820068,2547.560059,2574.409912,2574.409912,4120060000


In [17]:
log_returns['SP500'] = np.log(sp500['Adj Close']/sp500['Adj Close'].shift())

In [18]:
log_returns

# When a stock is a large capitalisation of the S&P500, it will have a high correlation. 
# The stocks in this chip portfolio have smaller market capitalisations.

Unnamed: 0_level_0,AMD,INTC,NVDA,TSM,SP500
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-02,,,,,
2019-01-03,-0.099301,-0.056584,-0.062319,-0.060967,-0.025068
2019-01-04,0.108289,0.059553,0.062099,0.017597,0.033759
2019-01-07,0.079395,0.004648,0.051587,0.007407,0.006986
2019-01-08,0.008713,0.006304,-0.025210,-0.008266,0.009649
...,...,...,...,...,...
2023-04-24,-0.009773,-0.021348,-0.002843,-0.008588,0.000851
2023-04-25,-0.044005,-0.026650,-0.030068,-0.028157,-0.015937
2023-04-26,0.025216,0.005869,0.026883,-0.000486,-0.003849
2023-04-27,0.017303,0.027501,0.009967,0.017117,0.019377


In [19]:
# Finding a ticker with a negative correlation

def test_correlation(ticker):
    df = pdr.get_data_yahoo(ticker, start)
    lr = log_returns.copy()
    lr[ticker] = np.log(df['Adj Close']/df['Adj Close'].shift())
    return lr.corr()

In [20]:
test_correlation("LQD")

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,AMD,INTC,NVDA,TSM,SP500,LQD
AMD,1.0,0.516657,0.780423,0.615678,0.649274,0.255748
INTC,0.516657,1.0,0.610178,0.54797,0.695087,0.257894
NVDA,0.780423,0.610178,1.0,0.666675,0.736256,0.281189
TSM,0.615678,0.54797,0.666675,1.0,0.64166,0.227445
SP500,0.649274,0.695087,0.736256,0.64166,1.0,0.332855
LQD,0.255748,0.257894,0.281189,0.227445,0.332855,1.0


In [22]:
test_correlation("TLT")
# Testing the correlation of a Twenty Year Treasury Bond ETF
# It has a negative correlation

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,AMD,INTC,NVDA,TSM,SP500,TLT
AMD,1.0,0.516657,0.780423,0.615678,0.649274,-0.070977
INTC,0.516657,1.0,0.610178,0.54797,0.695087,-0.183461
NVDA,0.780423,0.610178,1.0,0.666675,0.736256,-0.082551
TSM,0.615678,0.54797,0.666675,1.0,0.64166,-0.13613
SP500,0.649274,0.695087,0.736256,0.64166,1.0,-0.240693
TLT,-0.070977,-0.183461,-0.082551,-0.13613,-0.240693,1.0


In [23]:
import matplotlib.pyplot as plt
%matplotlib notebook

In [30]:
# Visualising the negative correlation

def visualise_correlation(ticker1, ticker2):
    df = pdr.get_data_yahoo([ticker1, ticker2], start)
    df = df['Adj Close']
    # Normalising the data
    df = df/df.iloc[0]
    # Creating a subplot
    fig, ax = plt.subplots()
    df.plot(ax=ax)

In [31]:
visualise_correlation("AMD", "TLT")

[*********************100%***********************]  2 of 2 completed


<IPython.core.display.Javascript object>

In [32]:
visualise_correlation("NVDA", "TLT")

[*********************100%***********************]  2 of 2 completed


<IPython.core.display.Javascript object>

In [35]:
visualise_correlation("^GSPC", "TLT")

# This subplot visualises the correlation between the S&P500 and Twent Year Treasury Bonds

[*********************100%***********************]  2 of 2 completed


<IPython.core.display.Javascript object>

In [36]:
# If all the stocks in the portfolio are highly correlated, they may be exposed to the same risk.
# Diversfying, and reducing the positive correlation may reduce risk exposure.