In [1]:
import datetime as dt
import pickle

from macrosim.SeriesAccessor import SeriesAccessor
from macrosim.SimEngine import SimEngine
from macrosim.EqSearch import EqSearch
from macrosim.GrowthDetector import GrowthDetector

Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [2]:
fred = SeriesAccessor(
    key_path='../fred_key.env',
    key_name='FRED_KEY'
)

start = dt.datetime.fromisoformat('2002-01-01')
end = dt.datetime.fromisoformat('2024-01-01')

df = fred.get_series(
    series_ids=['NETEXP', 'CIVPART', 'CORESTICKM159SFRBATL', 'LES1252881600Q', 'SPPOPGROWUSA', 'A264RX1A020NBEA', 'GDPC1'],
    series_alias=[None, None, 'CPI', 'RWAGE', 'POPGROWTH', 'I_C', 'RGDP'],
    reindex_freq='QS',
    date_range=(start, end),

)
df = fred.fill(
    data=df,
    methods=[None, None, None, None, 'ffill', 'divide', None]
)
df

Unnamed: 0,NETEXP,CIVPART,CPI,RWAGE,POPGROWTH,I_C,RGDP
2002-01-01,-386.884,66.5,3.557463,341.0,0.927797,80.2338,14372.785
2002-04-01,-428.259,66.7,3.465822,339.0,0.927797,80.2338,14460.848
2002-07-01,-447.548,66.5,3.263706,337.0,0.927797,80.2338,14519.633
2002-10-01,-496.289,66.6,2.937194,336.0,0.927797,80.2338,14537.580
2003-01-01,-525.262,66.4,2.778159,335.0,0.859482,82.2952,14614.141
...,...,...,...,...,...,...,...
2023-01-01,-813.648,62.4,6.488339,364.0,0.491925,126.6340,22403.435
2023-04-01,-803.479,62.6,6.267738,365.0,0.491925,126.6340,22539.418
2023-07-01,-781.091,62.6,5.402919,366.0,0.491925,126.6340,22780.933
2023-10-01,-791.152,62.7,4.882717,370.0,0.491925,126.6340,22960.600


In [3]:
eqsr = EqSearch(
    X=df.drop('RGDP', axis=1),
    y=df['RGDP']
)
eqsr.distil_split()
eqsr.search()

main_estimator = eqsr.get_model
eqsr.get_eq

Compiling Julia backend...


RandomForest Score at Distillation: 0.988


[ Info: Started!



Expressions evaluated per second: 2.730e+05
Progress: 1151 / 9300 total iterations (12.376%)
════════════════════════════════════════════════════════════════════════════════════════════════════
───────────────────────────────────────────────────────────────────────────────────────────────────
Complexity  Loss       Score      Equation
1           5.498e+06  1.594e+01  y = 18210
3           3.375e+06  2.439e-01  y = RWAGE * 52.801
4           3.305e+06  2.094e-02  y = log(I_C) * 3932.9
5           2.006e+06  4.992e-01  y = (POPGROWTH * -8694.5) + 24569
6           1.129e+06  5.748e-01  y = log(I_C + -63.867) * 5012.5
8           1.009e+06  5.637e-02  y = log((I_C - 66.685) + CPI) * 5025
9           9.752e+05  3.394e-02  y = (exp(CPI) + 4973) * log(I_C - 63.727)
10          8.313e+05  1.597e-01  y = ((POPGROWTH * -0.65297) + log(I_C - 50.66)) * 5244.4
12          6.888e+05  9.399e-02  y = (log(I_C + (CPI * 10.541)) * 7052.3) - (CIVPART ^ 2.32...
                                      9)


[ Info: Final population:
[ Info: Results saved to:


2*CPI - 2*NETEXP + 10.40815*RWAGE*log((-CIVPART + I_C + 2.721357)/POPGROWTH) + 4500.881 - 10.40815*(I_C - 14.352026)/POPGROWTH - 1289.0635/CPI

Error in callback _flush_stdio (for post_execute):


UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 4094-4095: unexpected end of data

In [None]:
gd = GrowthDetector(features=df.drop('RGDP', axis=1))
estimators = gd.compose_estimators()

init_params = {}
for col in eqsr.X:
    data = eqsr.X[col].tail(gd.get_lag_count)
    estimator = estimators[col]
    init_params[col] = (data, estimator)


engine = SimEngine(
    sr=main_estimator,
    n_lags=gd.get_lag_count,
    init_params=init_params,
    entropy_coef=None
)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=1.6819  , p=0.1982  , df_denom=85, df_num=1
ssr based chi2 test:   chi2=1.7413  , p=0.1870  , df=1
likelihood ratio test: chi2=1.7243  , p=0.1891  , df=1
parameter F test:         F=1.6819  , p=0.1982  , df_denom=85, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=3.6626  , p=0.0300  , df_denom=82, df_num=2
ssr based chi2 test:   chi2=7.7718  , p=0.0205  , df=2
likelihood ratio test: chi2=7.4441  , p=0.0242  , df=2
parameter F test:         F=3.6626  , p=0.0300  , df_denom=82, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.5871  , p=0.0589  , df_denom=79, df_num=3
ssr based chi2 test:   chi2=8.4491  , p=0.0376  , df=3
likelihood ratio test: chi2=8.0594  , p=0.0448  , df=3
parameter F test:         F=2.5871  , p=0.0589  , df_denom=79, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=2.3696  , p=0.0599  , df_d

In [None]:
out = engine.simulate(50)