In [5]:
import numpy as np
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
import os
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")

In [2]:
data = pd.read_csv('Data/training_data.csv')
print('Shape of Data: ', data.shape)
data.head()

Shape of Data:  (13642, 21)


Unnamed: 0,Date,Tenor,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9
0,1/5/2017,2M,0.468214,0.419251,0.37626,0.338158,0.304372,0.274823,0.250066,0.231535,...,0.220933,0.228418,0.240437,0.254316,0.268613,0.282659,0.296172,0.309045,0.32126,0.332831
1,1/5/2017,3M,0.458471,0.41271,0.372717,0.337489,0.306511,0.279714,0.25754,0.241007,...,0.229423,0.233839,0.242411,0.2531,0.264611,0.276244,0.287647,0.298654,0.309196,0.319254
2,1/5/2017,6M,0.410305,0.374189,0.343045,0.31607,0.292842,0.273231,0.257346,0.24544,...,0.234116,0.234078,0.236765,0.241295,0.246945,0.2532,0.259726,0.266312,0.272831,0.279207
3,1/5/2017,9M,0.385267,0.35458,0.32836,0.305878,0.286712,0.270643,0.25759,0.247529,...,0.23603,0.234085,0.234121,0.235658,0.238264,0.241586,0.24536,0.249397,0.253564,0.257773
4,1/5/2017,1Y,0.358986,0.333608,0.312126,0.29387,0.278408,0.265457,0.254819,0.246339,...,0.235185,0.232109,0.230384,0.22976,0.230003,0.230908,0.232303,0.234052,0.236049,0.238213


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13642 entries, 0 to 13641
Data columns (total 21 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    13642 non-null  object 
 1   Tenor   13642 non-null  object 
 2   0.1     13642 non-null  float64
 3   0.2     13642 non-null  float64
 4   0.3     13642 non-null  float64
 5   0.4     13642 non-null  float64
 6   0.5     13642 non-null  float64
 7   0.6     13642 non-null  float64
 8   0.7     13642 non-null  float64
 9   0.8     13642 non-null  float64
 10  0.9     13642 non-null  float64
 11  1       13642 non-null  float64
 12  1.1     13642 non-null  float64
 13  1.2     13642 non-null  float64
 14  1.3     13642 non-null  float64
 15  1.4     13642 non-null  float64
 16  1.5     13642 non-null  float64
 17  1.6     13642 non-null  float64
 18  1.7     13642 non-null  float64
 19  1.8     13642 non-null  float64
 20  1.9     13642 non-null  float64
dtypes: float64(19), object(2)
memory us

In [4]:
idx = pd.date_range('1/5/2017', '10/14/2019')

for g in data.groupby('Tenor').groups:
    grp = data.groupby('Tenor').get_group(g)
    grp = grp.set_index(['Date'])
    grp.index = pd.DatetimeIndex(grp.index)
    
    grp = grp.reindex(idx, fill_value = np.nan)
    grp = grp.backfill()
    
    train = grp.iloc[:-(60), :].drop(['Tenor'], axis = 1)
    validation = grp.iloc[-(60):, :].drop(['Tenor'], axis = 1)
    
    for i in range(19):
        train_1 = train.iloc[:, i]
        validation_1 = validation.iloc[:,i]
    
        model = SARIMAX(np.asarray(train_1), order = (1,1,1), seasonal_order=(1,1,1,12))
        
        model_fit = model.fit()
        
        preds = model_fit.predict(start = 0, end = len(validation)-1)
        print("RMSE of Model "+g+' '+str(i)+': ', mean_squared_error(validation_1.values, preds, squared = False))
        name = g + '_' + str(i) + '.pkl'
        
        if os.path.isdir('Models') == False:
            os.mkdir('Models')
        
        model_fit.save('Models/'+name)

RMSE of Model 10Y 0:  0.04836441827549115
RMSE of Model 10Y 1:  0.04556596787182114
RMSE of Model 10Y 2:  0.04384869604764117
RMSE of Model 10Y 3:  0.042879339611901865
RMSE of Model 10Y 4:  0.04241654063271475
RMSE of Model 10Y 5:  0.04227450703830954
RMSE of Model 10Y 6:  0.04230480440693243
RMSE of Model 10Y 7:  0.04236476362183114
RMSE of Model 10Y 8:  0.04241617096861248
RMSE of Model 10Y 9:  0.042306590697177224
RMSE of Model 10Y 10:  0.041993349695505694
RMSE of Model 10Y 11:  0.04143376691800002
RMSE of Model 10Y 12:  0.04060853963693271
RMSE of Model 10Y 13:  0.03954925771210537
RMSE of Model 10Y 14:  0.03826859417319216
RMSE of Model 10Y 15:  0.03684510674358978
RMSE of Model 10Y 16:  0.03535331527905646
RMSE of Model 10Y 17:  0.03384533618287751
RMSE of Model 10Y 18:  0.03239874661429332
RMSE of Model 15Y 0:  0.04652151352312884
RMSE of Model 15Y 1:  0.04551673552423882
RMSE of Model 15Y 2:  0.04532153991663466
RMSE of Model 15Y 3:  0.04568048665700524
RMSE of Model 15Y 4:  

RMSE of Model 40Y 5:  0.06978277226616823
RMSE of Model 40Y 6:  0.07023309801368323
RMSE of Model 40Y 7:  0.07063760630319141
RMSE of Model 40Y 8:  0.07054566903266057
RMSE of Model 40Y 9:  0.06989004711079788
RMSE of Model 40Y 10:  0.06864693696679879
RMSE of Model 40Y 11:  0.06684174500045839
RMSE of Model 40Y 12:  0.06454796502801174
RMSE of Model 40Y 13:  0.06187277829706898
RMSE of Model 40Y 14:  0.0589364214215393
RMSE of Model 40Y 15:  0.05585310483460866
RMSE of Model 40Y 16:  0.052724161763744365
RMSE of Model 40Y 17:  0.0496285957678515
RMSE of Model 40Y 18:  0.04662526282058903
RMSE of Model 4Y 0:  0.05260873939733818
RMSE of Model 4Y 1:  0.04984658974390072
RMSE of Model 4Y 2:  0.04779202384742003
RMSE of Model 4Y 3:  0.046195569259203095
RMSE of Model 4Y 4:  0.044897573888685025
RMSE of Model 4Y 5:  0.0437881087931883
RMSE of Model 4Y 6:  0.042787200921657165
RMSE of Model 4Y 7:  0.04183526115835605
RMSE of Model 4Y 8:  0.04088901749347424
RMSE of Model 4Y 9:  0.0399204769