In [2]:
import numpy as np
import pandas as pd
import os 
from Kalman_filter import *
from LW_BB_data import *
from functools import partial

In [3]:
data = lw_yld_curves_all_months[((lw_yld_curves_all_months['Date'] >= '2010-01-01') & (lw_yld_curves_all_months['Date'] <= '2020-12-31'))].reset_index(drop=True)
data = data[['Date', 3, 6, 9, 12, 18, 24, 36, 48, 60, 84, 96, 108, 120, 180, 240, 360]] # 16 maturities
data.iloc[:,1:] = data.iloc[:,1:] / 100

In [4]:

observed_data = np.array(data.iloc[:, 1:]) # our training data, 2010-2020, 132 rows and 30 mats
number_of_obs = 132
number_of_mats = 16
maturities = np.array([0.25, 0.5, 0.75, 1, 1.5, 2, 3, 4, 5, 7, 8, 9, 10, 15, 20, 30])

In [5]:
ini_para= np.array([0.5,1.0,2.0,0.03,0.01,-0.02,0.01,0.005,0.002,0.5,0.002])

partial_kalman = partial(Kalman_filter, number_of_obs=number_of_obs, number_of_mats=number_of_mats, maturities=maturities, observed_data=observed_data)

opt1 = minimize(
    partial_kalman,
    ini_para,
    method='Nelder-Mead',
    options={'maxiter': 10000}
)

8536.649252095034
8536.601057125612
8508.252070387442
8521.229048240462
8536.421983591628
8528.576482528222
8496.063754893537
8200.457248010407
8569.155553075014
8553.863827151115
8578.972029635355
8609.398336837587
8825.069189462767
9049.306071959376
8702.182801031497
8728.768195077075
8751.652250453746
8781.506988035377
8813.831387116687
8857.677744956976
8907.153052438398
8947.44485722013
8978.574787464317
9042.011965897496
9105.65000873976
9293.981894383976
9129.779656891142
9157.261281239082
9206.947420977172
9244.203841019555
9285.612225834795
9317.527740862957
9450.169959495644
9370.704651401767
9408.072350409988
9465.455747690621
9613.519829467938
9489.255573811639
9536.183104012838
9531.764398607984
9586.130305508657
9601.712103358554
9624.858780811395
9667.862454477889
9650.540642617203
9618.969403838162
9664.902134464695
9649.66587842989
9678.020533724126
9627.087930276753
9679.167546002458
9637.211642568991
9704.230540400455
9692.263100629974
9655.802122522746
9651.08672792

In [6]:
print("Optimal Parameters:")
for i in range(len(opt1.x)):
    print(opt1.x[i])

Optimal Parameters:
0.0010887360630237246
0.030253058293331796
0.03332436920696784
-0.051365413393623785
0.00017348209296532568
0.042119184066977663
0.00411732540965116
0.00838473304363408
0.018408898524614664
0.493625242892749
0.0008183942016066376


In [None]:
# RMSE statistics for in-sample data
rec_x, rec_y_insample = Kalman_filter_optimized(opt1.x, number_of_obs, number_of_mats, maturities, observed_data)

kalman_reconstructed_ylds = rec_y_insample.T

rmse_insample = []
for i in range(number_of_mats): 
    rmse = np.sqrt(np.mean(np.power(kalman_reconstructed_ylds[:, i] - observed_data[:, i],2)))*10000
    rmse_insample.append(rmse)

for rmse in rmse_insample:
    print(f"RMSE: {rmse}")

print(f"Average:     {np.mean(rmse_insample)}")


RMSE: 9.69489704640413
RMSE: 3.2399557509668377
RMSE: 3.3977426536026054
RMSE: 6.240551388622519
RMSE: 8.216256301715415
RMSE: 7.803323065641523
RMSE: 5.849454200714489
RMSE: 4.703776987481999
RMSE: 5.389513270112976
RMSE: 6.829624826036696
RMSE: 5.952764248921551
RMSE: 6.020805533803934
RMSE: 7.4683565830815954
RMSE: 9.680512504101094
RMSE: 6.515072693605008
RMSE: 14.703138091577502
Average:     6.981609071649367


In [None]:
# RMSE statistics for out-of-sample data
oos_data = lw_yld_curves_all_months[((lw_yld_curves_all_months['Date'] >= '2021-01-01') & (lw_yld_curves_all_months['Date'] <= '2023-06-30'))].reset_index(drop=True)
oos_data = oos_data[['Date', 3, 6, 9, 12, 18, 24, 36, 48, 60, 84, 96, 108, 120, 180, 240, 360]] # 16 maturities
oos_data.iloc[:,1:] = oos_data.iloc[:,1:] / 100

observed_data_oos = np.array(oos_data.iloc[:, 1:])

rec_x, rec_y_oos = Kalman_filter_optimized(opt1.x, 30, number_of_mats, maturities, observed_data_oos)

kalman_reconstructed_ylds_oos = rec_y_oos.T

rmse_oos = []
for i in range(number_of_mats):  
    rmse = np.sqrt(np.mean(np.power(kalman_reconstructed_ylds_oos[:, i] - observed_data_oos[:, i],2)))*10000
    rmse_oos.append(rmse)

for rmse in rmse_oos:
    print(f"RMSE: {rmse}")

print(f"Average: {np.mean(rmse_oos)}")

RMSE: 21.659350091055433
RMSE: 4.044151071636958
RMSE: 9.242393996458361
RMSE: 14.663281678623639
RMSE: 14.726754295783126
RMSE: 9.702049143204642
RMSE: 6.334430055641851
RMSE: 5.418894148051698
RMSE: 6.602871217951878
RMSE: 9.046692200448549
RMSE: 11.363092448694097
RMSE: 10.21438744373073
RMSE: 9.130583782205854
RMSE: 5.457395249537254
RMSE: 31.161629024164068
RMSE: 11.178645600769448
Average:     11.24666259049735


In [None]:
# Saving ouput
kalman_reconstructed_ylds_df = pd.DataFrame(kalman_reconstructed_ylds[:,[3, 5, 6, 8, 12, 13, 14, 15]])
kalman_reconstructed_ylds_df['Date'] = data['Date'].reset_index(drop=True)
kalman_reconstructed_ylds_df


kalman_reconstructed_ylds_df_oos = pd.DataFrame(kalman_reconstructed_ylds_oos[:,[3, 5, 6, 8, 12, 13, 14, 15]])
kalman_reconstructed_ylds_df_oos['Date'] = oos_data['Date'].reset_index(drop=True)
kalman_reconstructed_ylds_df_oos

y_kalman_df = pd.concat([kalman_reconstructed_ylds_df, kalman_reconstructed_ylds_df_oos])

# y_kalman_df.to_csv(data_path + "/y_kalman_merged.csv", sep = ';')