In [47]:
import os
import numpy as np
import pandas as pd
from sklearn.decomposition import KernelPCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize

# Data Preparation

In [2]:
# Check dataset
DATA_DIR = 'data'
os.listdir(DATA_DIR)

['SupportData.csv', '.SupportData.csv.swp', 'YieldCurve.txt']

In [3]:
# Load Datasets
yc_path = os.path.join(DATA_DIR, 'YieldCurve.txt')
yc_df = pd.read_csv(yc_path, sep='\t')
yc_df.head()

Unnamed: 0,Date,1 Mo,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,01/02/90,,7.83,7.89,7.81,7.87,7.9,7.87,7.98,7.94,,8.0
1,01/03/90,,7.89,7.94,7.85,7.94,7.96,7.92,8.04,7.99,,8.04
2,01/04/90,,7.84,7.9,7.82,7.92,7.93,7.91,8.02,7.98,,8.04
3,01/05/90,,7.79,7.85,7.79,7.9,7.94,7.92,8.03,7.99,,8.06
4,01/08/90,,7.79,7.88,7.81,7.9,7.95,7.92,8.05,8.02,,8.09


In [4]:
# Load Datasets
sd_path = os.path.join(DATA_DIR, 'SupportData.csv')
sd_df = pd.read_csv(sd_path)
sd_df.head()

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate
0,1954,7,1,,,,0.8,4.6,5.8,
1,1954,8,1,,,,1.22,,6.0,
2,1954,9,1,,,,1.06,,6.1,
3,1954,10,1,,,,0.85,8.0,5.7,
4,1954,11,1,,,,0.83,,5.3,


In [5]:
sd_df['Date'] = ''
sd_df.head()

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,Date
0,1954,7,1,,,,0.8,4.6,5.8,,
1,1954,8,1,,,,1.22,,6.0,,
2,1954,9,1,,,,1.06,,6.1,,
3,1954,10,1,,,,0.85,8.0,5.7,,
4,1954,11,1,,,,0.83,,5.3,,


In [6]:
for i in range(len(sd_df)):
    yy = '{}'.format(sd_df['Year'][i]).replace('19', '').replace('20', '')
    mm = '{0: >2}'.format(sd_df['Month'][i]).replace(' ', '0')
    dd = '{0: >2}'.format(sd_df['Day'][i]).replace(' ', '0')
    sd_df['Date'][i] = '{}/{}/{}'.format(mm, dd, yy)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [7]:
sd_df.head()

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,Date
0,1954,7,1,,,,0.8,4.6,5.8,,07/01/54
1,1954,8,1,,,,1.22,,6.0,,08/01/54
2,1954,9,1,,,,1.06,,6.1,,09/01/54
3,1954,10,1,,,,0.85,8.0,5.7,,10/01/54
4,1954,11,1,,,,0.83,,5.3,,11/01/54


In [8]:
data_df = pd.merge(sd_df, yc_df, how='inner', on=['Date'])
data_df.head()

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,...,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,1990,2,1,8.25,,,8.24,,5.3,4.6,...,8.02,8.13,8.09,8.28,8.35,8.35,8.38,8.42,,8.44
1,1990,3,1,8.25,,,8.28,,5.2,4.9,...,8.08,8.19,8.21,8.53,8.53,8.53,8.62,8.59,,8.61
2,1990,5,1,8.25,,,8.18,,5.4,4.8,...,8.19,8.49,8.56,9.02,9.09,9.08,9.09,9.08,,9.04
3,1990,6,1,8.25,,,8.29,,5.2,4.9,...,7.94,8.01,8.06,8.33,8.36,8.38,8.47,8.44,,8.43
4,1990,7,13,8.0,,,,,,,...,7.85,7.91,7.89,8.13,8.25,8.32,8.43,8.45,,8.46


In [9]:
data_df = data_df.fillna(method='backfill').fillna(method='pad')

In [10]:
data_df.head()

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,...,3 Mo,6 Mo,1 Yr,2 Yr,3 Yr,5 Yr,7 Yr,10 Yr,20 Yr,30 Yr
0,1990,2,1,8.25,0.25,0.0,8.24,-3.4,5.3,4.6,...,8.02,8.13,8.09,8.28,8.35,8.35,8.38,8.42,6.12,8.44
1,1990,3,1,8.25,0.25,0.0,8.28,-3.4,5.2,4.9,...,8.08,8.19,8.21,8.53,8.53,8.53,8.62,8.59,6.12,8.61
2,1990,5,1,8.25,0.25,0.0,8.18,-3.4,5.4,4.8,...,8.19,8.49,8.56,9.02,9.09,9.08,9.09,9.08,6.12,9.04
3,1990,6,1,8.25,0.25,0.0,8.29,-3.4,5.2,4.9,...,7.94,8.01,8.06,8.33,8.36,8.38,8.47,8.44,6.12,8.43
4,1990,7,13,8.0,0.25,0.0,8.13,-3.4,5.7,5.5,...,7.85,7.91,7.89,8.13,8.25,8.32,8.43,8.45,6.12,8.46


In [11]:
data_df.columns

Index(['Year', 'Month', 'Day', 'Federal Funds Target Rate',
       'Federal Funds Upper Target', 'Federal Funds Lower Target',
       'Effective Federal Funds Rate', 'Real GDP (Percent Change)',
       'Unemployment Rate', 'Inflation Rate', 'Date', '1 Mo', '3 Mo', '6 Mo',
       '1 Yr', '2 Yr', '3 Yr', '5 Yr', '7 Yr', '10 Yr', '20 Yr', '30 Yr'],
      dtype='object')

In [12]:
drop_list = ['Year', 'Month', 'Day', '1 Mo', '3 Mo', '6 Mo', '2 Yr', '3 Yr', 
             '5 Yr', '7 Yr', '10 Yr', '20 Yr', '30 Yr']
for col in drop_list:
    data_df = data_df.drop(col, 'columns')

In [13]:
data_df.head()

Unnamed: 0,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,Date,1 Yr
0,8.25,0.25,0.0,8.24,-3.4,5.3,4.6,02/01/90,8.09
1,8.25,0.25,0.0,8.28,-3.4,5.2,4.9,03/01/90,8.21
2,8.25,0.25,0.0,8.18,-3.4,5.4,4.8,05/01/90,8.56
3,8.25,0.25,0.0,8.29,-3.4,5.2,4.9,06/01/90,8.06
4,8.0,0.25,0.0,8.13,-3.4,5.7,5.5,07/13/90,7.89


# Data Cleaning

In [48]:
data_mat = np.array(data_df)[:, 0: -2]
data_mat = data_mat.astype(float)
data_mat, data_mat.shape

(array([[ 8.25,  0.25,  0.  , ..., -3.4 ,  5.3 ,  4.6 ],
        [ 8.25,  0.25,  0.  , ..., -3.4 ,  5.2 ,  4.9 ],
        [ 8.25,  0.25,  0.  , ..., -3.4 ,  5.4 ,  4.8 ],
        ...,
        [ 1.  ,  0.75,  0.5 , ...,  3.5 ,  4.7 ,  2.2 ],
        [ 1.  ,  0.75,  0.5 , ...,  3.5 ,  4.7 ,  2.2 ],
        [ 1.  ,  1.  ,  0.75, ...,  3.5 ,  4.7 ,  2.2 ]]), (287, 7))

# Normalization

In [49]:
data_mat = normalize(data_mat, 'max', axis=1)

In [50]:
data_mat

array([[ 1.     ,  0.0303 ,  0.     , ..., -0.41212,  0.64242,  0.55758],
       [ 0.99638,  0.03019,  0.     , ..., -0.41063,  0.62802,  0.59179],
       [ 1.     ,  0.0303 ,  0.     , ..., -0.41212,  0.65455,  0.58182],
       ...,
       [ 0.21277,  0.15957,  0.10638, ...,  0.74468,  1.     ,  0.46809],
       [ 0.21277,  0.15957,  0.10638, ...,  0.74468,  1.     ,  0.46809],
       [ 0.21277,  0.21277,  0.15957, ...,  0.74468,  1.     ,  0.46809]])

# PCA: Singular Value Decomposition

In [51]:
pca = PCA(n_components=7)
pca.fit(data_mat) 

PCA(copy=True, iterated_power='auto', n_components=7, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [52]:
np.set_printoptions(precision=5, suppress=True)
print('explained variance ratio:', pca.explained_variance_ratio_)  
print('singular values:', pca.singular_values_) 

explained variance ratio: [0.65002 0.29853 0.03511 0.01203 0.00289 0.00138 0.00003]
singular values: [8.80571 5.96756 2.04651 1.19805 0.58756 0.40512 0.06449]


# Kernal PCA: Linear

In [44]:
transformer = KernelPCA(n_components=6, kernel='linear')
X_transformed = transformer.fit_transform(data_mat)

In [45]:
X_transformed.shape

(287, 6)

In [46]:
X_transformed

array([[ 6.25034, -6.92235, -1.24112,  0.18541,  0.06181, -0.08175],
       [ 6.36169, -6.95343, -1.25607, -0.11005,  0.04127, -0.06459],
       [ 6.22087, -6.93219, -1.39639,  0.02031,  0.09593, -0.0715 ],
       ...,
       [-3.02839,  1.23331,  1.88822, -1.09541,  0.08199, -0.53517],
       [-3.02839,  1.23331,  1.88822, -1.09541,  0.08199, -0.53517],
       [-3.03085,  1.23449,  1.89685, -1.11924,  0.10974, -0.88671]])

# Check Data

In [14]:
len(sd_df)

904

In [15]:
sd_df[500: 503]

Unnamed: 0,Year,Month,Day,Federal Funds Target Rate,Federal Funds Upper Target,Federal Funds Lower Target,Effective Federal Funds Rate,Real GDP (Percent Change),Unemployment Rate,Inflation Rate,Date
500,1990,2,1,8.25,,,8.24,,5.3,4.6,02/01/90
501,1990,3,1,8.25,,,8.28,,5.2,4.9,03/01/90
502,1990,4,1,8.25,,,8.26,1.6,5.4,4.8,04/01/90
