In [1]:
# Import libraries
import numpy as np
import pandas as pd

# Plot settings
import cufflinks as cf
cf.set_config_file(offline=True)

# scikit
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

In [2]:
# Read data
data = pd.read_csv('data/BLG daily 2002 to 2007 hjm-lecture.txt', index_col=0, sep='\t')
data.head()

Unnamed: 0,0.08,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0,10.5,11.0,11.5,12.0,12.5,13.0,13.5,14.0,14.5,15.0,15.5,16.0,16.5,17.0,17.5,18.0,18.5,19.0,19.5,20.0,20.5,21.0,21.5,22.0,22.5,23.0,23.5,24.0,24.5,25.0
1,5.77,6.44,6.71,6.65,6.5,6.33,6.15,5.99,5.84,5.71,5.57,5.44,5.3,5.16,5.01,4.86,4.71,4.55,4.39,4.24,4.09,3.94,3.81,3.68,3.57,3.46,3.37,3.29,3.23,3.18,3.15,3.13,3.12,3.12,3.13,3.16,3.19,3.22,3.27,3.31,3.36,3.42,3.48,3.54,3.6,3.66,3.73,3.79,3.86,3.92,3.99
2,5.77,6.45,6.75,6.68,6.54,6.39,6.23,6.08,5.95,5.82,5.69,5.56,5.43,5.28,5.13,4.97,4.8,4.63,4.46,4.29,4.13,3.97,3.82,3.68,3.55,3.44,3.33,3.25,3.18,3.12,3.08,3.06,3.05,3.05,3.06,3.09,3.12,3.16,3.21,3.26,3.32,3.38,3.44,3.51,3.58,3.65,3.72,3.8,3.87,3.95,4.02
3,5.78,6.44,6.74,6.68,6.56,6.41,6.26,6.12,5.98,5.84,5.71,5.57,5.43,5.28,5.12,4.96,4.79,4.62,4.45,4.28,4.11,3.95,3.8,3.66,3.53,3.41,3.31,3.22,3.14,3.08,3.04,3.01,2.99,2.99,3.0,3.02,3.04,3.08,3.12,3.16,3.22,3.27,3.33,3.39,3.45,3.52,3.59,3.65,3.72,3.79,3.86
4,5.74,6.41,6.69,6.62,6.49,6.35,6.2,6.06,5.93,5.79,5.66,5.52,5.38,5.23,5.07,4.91,4.74,4.57,4.4,4.23,4.06,3.91,3.75,3.61,3.48,3.36,3.25,3.15,3.07,3.01,2.96,2.92,2.9,2.89,2.89,2.9,2.92,2.95,2.99,3.03,3.08,3.13,3.19,3.25,3.31,3.37,3.44,3.51,3.58,3.65,3.72
5,5.74,6.4,6.64,6.55,6.42,6.27,6.13,5.98,5.85,5.72,5.58,5.44,5.3,5.15,5.0,4.83,4.67,4.5,4.33,4.17,4.0,3.85,3.7,3.56,3.42,3.3,3.19,3.09,3.01,2.94,2.89,2.85,2.82,2.8,2.8,2.8,2.82,2.84,2.88,2.92,2.96,3.01,3.07,3.12,3.19,3.25,3.32,3.38,3.45,3.53,3.6


In [3]:
data.shape

(1264, 51)

In [4]:
# Plot curve
data.iloc[0].iplot(title='Representation of a Yield Curve')

In [5]:
# Plot all curves
data.T.iplot(title='Daily Yield Curves')

In [7]:
diff_ = data.diff(-1)
diff_.dropna(inplace=True)

diff_.tail()

Unnamed: 0,0.08,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0,10.5,11.0,11.5,12.0,12.5,13.0,13.5,14.0,14.5,15.0,15.5,16.0,16.5,17.0,17.5,18.0,18.5,19.0,19.5,20.0,20.5,21.0,21.5,22.0,22.5,23.0,23.5,24.0,24.5,25.0
1259,0.0,0.03,0.04,0.03,0.02,0.02,0.01,0.01,0.0,0.0,0.0,0.0,-0.01,0.0,-0.01,0.0,0.0,0.0,0.0,0.0,0.0,-0.01,0.0,0.0,-0.01,0.0,0.0,0.0,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01,-0.02,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01,0.0,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01
1260,0.02,0.01,0.0,0.0,0.0,-0.01,-0.01,-0.01,0.0,-0.01,-0.01,-0.01,0.0,0.0,0.0,-0.01,-0.01,-0.01,-0.01,0.0,-0.01,0.0,0.0,0.0,0.0,-0.01,-0.01,-0.01,0.0,0.0,-0.01,0.0,-0.01,0.0,-0.01,0.0,0.0,-0.01,-0.01,-0.01,-0.01,0.0,0.0,-0.01,-0.01,0.0,0.0,0.0,0.0,0.0,0.0
1261,-0.01,-0.03,-0.08,-0.12,-0.13,-0.13,-0.13,-0.13,-0.14,-0.13,-0.14,-0.14,-0.14,-0.14,-0.14,-0.14,-0.13,-0.14,-0.13,-0.14,-0.13,-0.13,-0.13,-0.13,-0.12,-0.11,-0.11,-0.11,-0.11,-0.11,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.09,-0.09,-0.09,-0.1,-0.1,-0.1,-0.09,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1
1262,0.0,0.0,0.01,0.02,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.02,0.01,0.02,0.02,0.01,0.01,0.01,0.02,0.01,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.0
1263,0.02,0.0,0.03,0.03,0.04,0.04,0.05,0.06,0.06,0.06,0.07,0.07,0.06,0.05,0.05,0.05,0.04,0.04,0.03,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.03,0.02,0.02,0.03,0.03,0.03,0.04,0.04,0.04,0.04,0.04,0.04,0.05,0.05,0.04,0.05,0.04,0.05,0.05,0.05,0.05,0.05,0.05,0.06


In [8]:
diff_.shape

(1263, 51)

In [9]:
vol = np.std(diff_, axis=0) * 10000
vol[:21].iplot(title='Volatility of daily UK government yields', xTitle='Tenor', color='cornflowerblue')

In [10]:
cov_ = pd.DataFrame(np.cov(diff_, rowvar=False)*252/10000, columns=diff_.columns)
cov_.style.format('{:.4%}')

Unnamed: 0,0.08,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5,10.0,10.5,11.0,11.5,12.0,12.5,13.0,13.5,14.0,14.5,15.0,15.5,16.0,16.5,17.0,17.5,18.0,18.5,19.0,19.5,20.0,20.5,21.0,21.5,22.0,22.5,23.0,23.5,24.0,24.5,25.0
0,0.0040%,0.0009%,0.0002%,-0.0001%,-0.0001%,-0.0000%,0.0001%,0.0001%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0002%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0000%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%,0.0001%
1,0.0009%,0.0063%,0.0055%,0.0041%,0.0035%,0.0033%,0.0031%,0.0029%,0.0028%,0.0027%,0.0026%,0.0025%,0.0024%,0.0022%,0.0021%,0.0020%,0.0019%,0.0018%,0.0017%,0.0016%,0.0015%,0.0014%,0.0013%,0.0012%,0.0011%,0.0011%,0.0010%,0.0009%,0.0009%,0.0008%,0.0008%,0.0008%,0.0007%,0.0008%,0.0008%,0.0008%,0.0008%,0.0008%,0.0008%,0.0009%,0.0009%,0.0009%,0.0010%,0.0010%,0.0010%,0.0011%,0.0012%,0.0012%,0.0012%,0.0013%,0.0013%
2,0.0002%,0.0055%,0.0082%,0.0077%,0.0068%,0.0061%,0.0056%,0.0052%,0.0048%,0.0045%,0.0042%,0.0040%,0.0038%,0.0036%,0.0035%,0.0033%,0.0032%,0.0031%,0.0029%,0.0028%,0.0027%,0.0026%,0.0025%,0.0023%,0.0022%,0.0021%,0.0020%,0.0020%,0.0019%,0.0018%,0.0018%,0.0017%,0.0017%,0.0017%,0.0017%,0.0017%,0.0017%,0.0017%,0.0017%,0.0018%,0.0018%,0.0018%,0.0019%,0.0019%,0.0020%,0.0021%,0.0021%,0.0022%,0.0022%,0.0023%,0.0024%
3,-0.0001%,0.0041%,0.0077%,0.0082%,0.0075%,0.0069%,0.0063%,0.0058%,0.0055%,0.0051%,0.0049%,0.0046%,0.0044%,0.0042%,0.0041%,0.0039%,0.0038%,0.0036%,0.0035%,0.0034%,0.0032%,0.0031%,0.0029%,0.0028%,0.0027%,0.0026%,0.0025%,0.0024%,0.0023%,0.0022%,0.0022%,0.0021%,0.0021%,0.0021%,0.0021%,0.0021%,0.0021%,0.0021%,0.0021%,0.0022%,0.0022%,0.0022%,0.0023%,0.0023%,0.0025%,0.0025%,0.0026%,0.0026%,0.0027%,0.0027%,0.0028%
4,-0.0001%,0.0035%,0.0068%,0.0075%,0.0072%,0.0067%,0.0063%,0.0059%,0.0056%,0.0054%,0.0051%,0.0049%,0.0047%,0.0046%,0.0044%,0.0043%,0.0041%,0.0039%,0.0038%,0.0036%,0.0035%,0.0033%,0.0031%,0.0030%,0.0029%,0.0027%,0.0026%,0.0025%,0.0024%,0.0023%,0.0023%,0.0022%,0.0022%,0.0022%,0.0022%,0.0022%,0.0022%,0.0022%,0.0022%,0.0023%,0.0023%,0.0024%,0.0025%,0.0025%,0.0026%,0.0026%,0.0027%,0.0028%,0.0028%,0.0029%,0.0030%
5,-0.0000%,0.0033%,0.0061%,0.0069%,0.0067%,0.0065%,0.0062%,0.0060%,0.0058%,0.0055%,0.0054%,0.0052%,0.0051%,0.0049%,0.0048%,0.0046%,0.0044%,0.0042%,0.0041%,0.0039%,0.0037%,0.0035%,0.0034%,0.0032%,0.0030%,0.0029%,0.0028%,0.0027%,0.0026%,0.0025%,0.0024%,0.0023%,0.0023%,0.0023%,0.0023%,0.0023%,0.0023%,0.0023%,0.0024%,0.0024%,0.0024%,0.0025%,0.0026%,0.0026%,0.0027%,0.0028%,0.0028%,0.0029%,0.0030%,0.0031%,0.0032%
6,0.0001%,0.0031%,0.0056%,0.0063%,0.0063%,0.0062%,0.0061%,0.0060%,0.0058%,0.0057%,0.0056%,0.0054%,0.0053%,0.0052%,0.0051%,0.0049%,0.0047%,0.0045%,0.0043%,0.0042%,0.0040%,0.0037%,0.0036%,0.0033%,0.0032%,0.0030%,0.0029%,0.0028%,0.0026%,0.0025%,0.0025%,0.0024%,0.0024%,0.0024%,0.0023%,0.0024%,0.0024%,0.0024%,0.0024%,0.0025%,0.0025%,0.0026%,0.0027%,0.0027%,0.0028%,0.0029%,0.0029%,0.0030%,0.0031%,0.0032%,0.0033%
7,0.0001%,0.0029%,0.0052%,0.0058%,0.0059%,0.0060%,0.0060%,0.0060%,0.0059%,0.0058%,0.0058%,0.0057%,0.0056%,0.0055%,0.0054%,0.0052%,0.0050%,0.0048%,0.0046%,0.0044%,0.0042%,0.0040%,0.0038%,0.0035%,0.0033%,0.0032%,0.0030%,0.0029%,0.0028%,0.0026%,0.0026%,0.0025%,0.0024%,0.0025%,0.0024%,0.0025%,0.0024%,0.0025%,0.0025%,0.0026%,0.0026%,0.0027%,0.0028%,0.0028%,0.0029%,0.0030%,0.0031%,0.0032%,0.0032%,0.0033%,0.0034%
8,0.0002%,0.0028%,0.0048%,0.0055%,0.0056%,0.0058%,0.0058%,0.0059%,0.0059%,0.0059%,0.0059%,0.0058%,0.0058%,0.0057%,0.0056%,0.0055%,0.0053%,0.0051%,0.0049%,0.0047%,0.0044%,0.0042%,0.0040%,0.0037%,0.0035%,0.0033%,0.0031%,0.0030%,0.0029%,0.0027%,0.0027%,0.0026%,0.0025%,0.0025%,0.0025%,0.0025%,0.0025%,0.0026%,0.0026%,0.0026%,0.0027%,0.0028%,0.0029%,0.0029%,0.0030%,0.0031%,0.0032%,0.0033%,0.0033%,0.0034%,0.0035%
9,0.0002%,0.0027%,0.0045%,0.0051%,0.0054%,0.0055%,0.0057%,0.0058%,0.0059%,0.0060%,0.0060%,0.0060%,0.0060%,0.0059%,0.0058%,0.0057%,0.0055%,0.0053%,0.0051%,0.0049%,0.0046%,0.0044%,0.0042%,0.0039%,0.0037%,0.0035%,0.0033%,0.0031%,0.0030%,0.0029%,0.0028%,0.0027%,0.0026%,0.0026%,0.0026%,0.0026%,0.0026%,0.0027%,0.0027%,0.0027%,0.0028%,0.0028%,0.0030%,0.0030%,0.0031%,0.0032%,0.0033%,0.0034%,0.0035%,0.0036%,0.0037%


In [12]:
# Perform eigen decomposition
eigenvalues, eigenvectors = np.linalg.eig(cov_)

# Sort values (good practice)
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:,idx]

# Format into a DataFrame
df_eigval = pd.DataFrame({'Eigenvalues': eigenvalues})

eigenvalues

array([2.02898049e-03, 4.63398406e-04, 1.63446845e-04, 8.51547101e-05,
       5.10538526e-05, 3.32765289e-05, 1.58231855e-05, 4.49832087e-06,
       1.94407432e-06, 8.99455051e-07, 6.04790270e-07, 5.90792253e-07,
       5.89198637e-07, 5.57023543e-07, 5.55577838e-07, 5.37017622e-07,
       5.25225242e-07, 5.09484922e-07, 5.02130032e-07, 4.95037888e-07,
       4.85536393e-07, 4.74757652e-07, 4.66830631e-07, 4.56358980e-07,
       4.53910470e-07, 4.45678829e-07, 4.35704316e-07, 4.34084479e-07,
       4.26484963e-07, 4.13347804e-07, 4.01916308e-07, 3.97702101e-07,
       3.90292851e-07, 3.86498129e-07, 3.76760528e-07, 3.73179456e-07,
       3.63351112e-07, 3.57997757e-07, 3.48773694e-07, 3.42142905e-07,
       3.35540502e-07, 3.27434287e-07, 3.20549997e-07, 3.13802097e-07,
       3.06870950e-07, 3.04664148e-07, 2.99586146e-07, 2.88553566e-07,
       2.83944056e-07, 2.67537628e-07, 2.48780504e-07])

In [15]:
# Work out explained proportion
df_eigval['Explained proportion'] = df_eigval['Eigenvalues'] / np.sum(df_eigval['Eigenvalues'])
df_eigval = df_eigval[:10]

# Format as percentage
df_eigval.style.format({'Explained proportion': '{:.2%}'})

Unnamed: 0,Eigenvalues,Explained proportion
0,0.002029,71.23%
1,0.000463,16.27%
2,0.000163,5.74%
3,8.5e-05,2.99%
4,5.1e-05,1.79%
5,3.3e-05,1.17%
6,1.6e-05,0.56%
7,4e-06,0.16%
8,2e-06,0.07%
9,1e-06,0.03%


In [16]:
(df_eigval['Explained proportion'][:10]*100).iplot(kind='bar', title='Percentage of overal variance', color='cornflowerblue')

In [None]:
#