<a target="_blank" href="https://colab.research.google.com/github/Techtonique/nnetsauce/blob/crossvalmts/nnetsauce/demo/vereszkipeter_2025-01-28-nnetsauce-mts-crossval-example.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
!pip install --upgrade pip
!pip install git+https://github.com/Techtonique/nnetsauce.git@crossvalmts --upgrade --no-cache-dir

### This code is copied from nnetasuce's MTS crross_val_score example:

In [2]:
import os
import nnetsauce as ns
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.datasets import load_diabetes
from sklearn.linear_model import ElasticNet, Ridge, LassoCV
from sklearn.model_selection import train_test_split
from statsmodels.tsa.base.datetools import dates_from_str
from nnetsauce.utils.model_selection import cross_val_score

#print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

# some example data
mdata = sm.datasets.macrodata.load_pandas().data
# prepare the dates index
dates = mdata[['year', 'quarter']].astype(int).astype(str)
quarterly = dates["year"] + "Q" + dates["quarter"]
quarterly = dates_from_str(quarterly)
print(mdata.head())
mdata = mdata[['realgovt', 'tbilrate', 'cpi']]
mdata.index = pd.DatetimeIndex(quarterly)
data = np.log(mdata).diff().dropna()

n = data.shape[0]
max_idx_train = np.floor(n*0.9)
training_index = np.arange(0, max_idx_train)
testing_index = np.arange(max_idx_train, n)
df_train = data.iloc[training_index,:]
df_test = data.iloc[testing_index,:]

print(f"df_train.shape: {df_train.shape}")
regr = Ridge()
obj_MTS = ns.MTS(regr, lags = 3,
                 n_hidden_features=7,
                 seed=24, verbose = 0,
                 show_progress=False)

     year  quarter  realgdp  realcons  realinv  realgovt  realdpi   cpi  \
0 1959.00     1.00  2710.35   1707.40   286.90    470.05  1886.90 28.98   
1 1959.00     2.00  2778.80   1733.70   310.86    481.30  1919.70 29.15   
2 1959.00     3.00  2775.49   1751.80   289.23    491.26  1916.40 29.35   
3 1959.00     4.00  2785.20   1753.70   299.36    484.05  1931.30 29.37   
4 1960.00     1.00  2847.70   1770.50   331.72    462.20  1955.50 29.54   

      m1  tbilrate  unemp    pop  infl  realint  
0 139.70      2.82   5.80 177.15  0.00     0.00  
1 141.70      3.08   5.10 177.83  2.34     0.74  
2 140.50      3.82   5.30 178.66  2.74     1.09  
3 140.00      4.33   5.60 179.39  0.27     4.06  
4 139.60      3.50   5.20 180.01  2.31     1.19  
df_train.shape: (181, 3)


In [3]:
# Tried to set frequency for df_train but exception occurs
#df_train = df_train.asfreq("QE")

# TM: Not sure what 'QE' does, stick to the examples in the documentation (otherwise, you could contribute this to the package)

In [4]:
obj_MTS.fit(df_train)

In [5]:
obj_MTS.input_dates
# Here we see the quarterly dates as in `quarterly` object

Unnamed: 0,date
0,1959-06-30
1,1959-09-30
2,1959-12-31
3,1960-03-31
4,1960-06-30
...,...
176,2003-06-30
177,2003-09-30
178,2003-12-31
179,2004-03-31


In [6]:
print(obj_MTS.predict(h=10))
# The output is right!

            realgovt  tbilrate  cpi
date                               
2004-09-30      0.00      0.04 0.01
2004-12-31      0.00     -0.05 0.01
2005-03-31      0.00      0.02 0.01
2005-06-30      0.00      0.03 0.01
2005-09-30      0.00      0.00 0.01
2005-12-31      0.00      0.01 0.01
2006-03-31      0.00      0.02 0.01
2006-06-30     -0.00      0.02 0.01
2006-09-30     -0.00      0.02 0.01
2006-12-31     -0.00      0.02 0.01


In [7]:
# obj_MTS.plot("realgovt") # if we want this to work, we need to ask for probabilistic forecasts at the beginning

In [8]:
obj_MTS.output_dates_
# The same

Unnamed: 0,date
0,2004-09-30
1,2004-12-31
2,2005-03-31
3,2005-06-30
4,2005-09-30
5,2005-12-31
6,2006-03-31
7,2006-06-30
8,2006-09-30
9,2006-12-31


In [9]:
print(obj_MTS.cross_val_score(df_train,
        n_jobs=None,
        verbose = 0,
        initial_window=100,
        horizon=5,
        fixed_window=False,
        show_progress=True))

100%|██████████| 77/77 [00:09<00:00,  7.74it/s]

(array([0.08036295, 0.07163626, 0.07309572, 0.07415093, 0.07322812,
       0.05706345, 0.05104346, 0.04820787, 0.07040793, 0.0395777 ,
       0.04041368, 0.03268957, 0.04716435, 0.05186224, 0.05502821,
       0.05212436, 0.06668787, 0.05749807, 0.04604286, 0.03037117,
       0.01757671, 0.03041419, 0.03370354, 0.0549238 , 0.05338605,
       0.05037539, 0.07646393, 0.07442479, 0.07523952, 0.09286762,
       0.07996592, 0.05311544, 0.05272979, 0.03586323, 0.03704775,
       0.04814703, 0.06507738, 0.06522773, 0.09680396, 0.08245264,
       0.07338459, 0.050336  , 0.04369543, 0.03173418, 0.03372357,
       0.02569141, 0.02101952, 0.02829682, 0.02999359, 0.02071966,
       0.01246087, 0.01615702, 0.01954789, 0.03266004, 0.03220545,
       0.03246558, 0.0286146 , 0.0307969 , 0.03256391, 0.03820682,
       0.04347297, 0.04128335, 0.03584373, 0.08121428, 0.10060696,
       0.11155611, 0.16040294, 0.16049974, 0.1448402 , 0.13329526,
       0.126114  , 0.0844537 , 0.0940232 , 0.08621254, 0.0919




In [10]:
obj_MTS.fit(df_train)
obj_MTS.input_dates

Unnamed: 0,date
0,1959-06-30
1,1959-09-30
2,1959-12-31
3,1960-03-31
4,1960-06-30
...,...
176,2003-06-30
177,2003-09-30
178,2003-12-31
179,2004-03-31


In [11]:
obj_MTS.fit(df_train)
obj_MTS.input_dates

Unnamed: 0,date
0,1959-06-30
1,1959-09-30
2,1959-12-31
3,1960-03-31
4,1960-06-30
...,...
176,2003-06-30
177,2003-09-30
178,2003-12-31
179,2004-03-31
