*https://github.com/Aubrey-Bermuda-CA/IFRS9-MODELING-CHINA*

*3.1 Function description: Read macro historical data and external prediction data, use autoarima to predict macro data.*

In [1]:
import time
import pandas as pd
import pmdarima as pm

In [2]:
# set date range
start_date = '2010-03-31'
forecast_date = '2026-09-30'
date_index = pd.date_range(start=start_date, end=forecast_date, freq='Q')

# read macro data
macro_his_data = pd.read_csv(f'./macrofactor_historical_data.csv', index_col=[0], parse_dates=[0])
macro_fc_data = pd.read_csv(f'./macrofactor_forecast_data.csv', index_col=[0], parse_dates=[0])

# combine data
macro_data = macro_his_data.combine_first(macro_fc_data)

# Missing values ​​can be handled using interpolation. There are several methods to choose from:
# 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'polynomial', 
# 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima', 'cubicspline'
# macro_data = macro_data.interpolate(method='spline')

macro_data

Unnamed: 0_level_0,GDP_Const_Qtr,Ind_Pro_Mth,CPI_Mth,Core_CPI_Mth,PPI_Mth,FAI_Cum,Retail_Sales_Mth,Imports_Mth,Exports_Mth,M1,...,PPIRM_Mth,CGPI_Mth,70_Cities_Price_Mth,Power_Gen_Mth,Freight_Mth,Urban_Income_Cum,Urban_Exp_Cum,Nat_Housing_Index,Macro_Eco_Index,Housing_Sales_Cum
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-03-31,12.20,18.1,102.400,,5.91,26.4,18.00,66.27,24.21,29.94,...,11.46,105.6,9.5,17.6,15.800000,9.8,11.01,105.89,104.0,57.7
2010-06-30,10.80,13.7,102.900,,6.41,25.5,18.30,33.87,43.87,24.56,...,10.80,106.6,7.7,11.4,12.300000,10.2,9.89,105.06,102.6,25.4
2010-09-30,9.90,13.3,103.600,,4.33,24.5,18.84,24.38,25.08,20.87,...,7.10,106.1,6.2,8.1,11.900000,10.5,9.32,103.52,101.9,15.9
2010-12-31,9.90,13.5,104.600,,5.93,24.5,19.10,25.94,17.87,21.19,...,9.47,107.9,5.0,5.1,0.310000,11.3,9.84,101.79,103.6,18.3
2011-03-31,10.20,14.8,105.383,,7.31,25.0,17.40,27.50,35.76,15.00,...,10.53,109.3,3.7,14.8,14.300000,12.3,10.69,102.98,102.5,27.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-30,4.60,5.4,100.400,0.1,-2.80,3.4,3.20,0.30,2.40,-7.40,...,-2.20,98.3,-9.0,6.0,2.315156,4.5,5.00,92.40,,-22.7
2024-12-31,4.85,,1.800,0.7,-2.20,3.0,6.40,11.00,,-6.50,...,,,,,,,,,,
2025-03-31,4.00,,,,,3.6,,,,,...,,,,,,,,,,
2025-06-30,4.55,,,,,3.9,,,,,...,,,,,,,,,,


In [3]:
# macrofactor forecast with autoarima
df = macro_data
predict_data = pd.DataFrame(index=date_index)
print(f'macrofoctor_list：{df.columns.values}')

# timer
start_t = time.perf_counter()

# fitmodel
for column in df:
    y = df[column].dropna()
    # seasonal data use 4
    m = 4
    # calc d and D firt to save time
    d = pm.arima.ndiffs(y, alpha=0.05, test='adf', max_d=2)
    D = pm.arima.nsdiffs(y, m=m, test='ocsb', max_D=2)
    print(f'Traverse {column} equation...')
    arima = pm.auto_arima(y, m=m, method='lbfgs', # 'newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg', 'basinhopping'
                          start_p=1, start_q=1, d=d, 
                          start_P=1, start_Q=1, D=D,
                          max_p=5, max_q=5, 
                          max_P=5, max_Q=5,
                          seasonal=True, alpha=0.05,
                          information_criterion='aic', # 'aic', 'bic', 'hqic', 'oob'
                          stepwise=True, # The stepwise algorithm can be significantly faster than fitting all
                          error_action='ignore', 
                          suppress_warnings=True,
                          trace=True
                          )
    # predict
    pr_in = arima.predict_in_sample(X=None, dynamic=False, return_conf_int=False)
    pr_out = arima.predict(n_periods=15, X=None, return_conf_int=False)
    pr = pd.DataFrame({column:pd.concat([pr_in, pr_out])})
    predict_data = predict_data.merge(pr, left_index=True, right_index=True, how='left')

# Output prediction results
predict_data = macro_data.combine_first(predict_data)
predict_data.to_csv(f'./3.1_predict_data.csv')

dur = (time.perf_counter() - start_t) / 60
print(f'The prediction process took a total of {dur:,.2f} minutes.')

macrofoctor_list：['GDP_Const_Qtr' 'Ind_Pro_Mth' 'CPI_Mth' 'Core_CPI_Mth' 'PPI_Mth'
 'FAI_Cum' 'Retail_Sales_Mth' 'Imports_Mth' 'Exports_Mth' 'M1' 'M2'
 'Loans_RMB' 'Social_Financing' 'Manuf_PMI' 'Ind_Revenue_Cum' 'PPIRM_Mth'
 'CGPI_Mth' '70_Cities_Price_Mth' 'Power_Gen_Mth' 'Freight_Mth'
 'Urban_Income_Cum' 'Urban_Exp_Cum' 'Nat_Housing_Index' 'Macro_Eco_Index'
 'Housing_Sales_Cum']
Traverse GDP_Const_Qtr equation...
Performing stepwise search to minimize aic
 ARIMA(1,0,1)(1,0,1)[4] intercept   : AIC=inf, Time=0.15 sec
 ARIMA(0,0,0)(0,0,0)[4] intercept   : AIC=322.792, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[4] intercept   : AIC=304.362, Time=0.04 sec
 ARIMA(0,0,1)(0,0,1)[4] intercept   : AIC=312.425, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[4]             : AIC=430.838, Time=0.01 sec
 ARIMA(1,0,0)(0,0,0)[4] intercept   : AIC=308.477, Time=0.02 sec
 ARIMA(1,0,0)(2,0,0)[4] intercept   : AIC=304.825, Time=0.06 sec
 ARIMA(1,0,0)(1,0,1)[4] intercept   : AIC=inf, Time=0.13 sec
 ARIMA(1,0,0)(0,0,1)[4] i

  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,1,1)(1,0,1)[4] intercept   : AIC=inf, Time=0.18 sec
 ARIMA(0,1,0)(0,0,0)[4] intercept   : AIC=204.086, Time=0.01 sec
 ARIMA(1,1,0)(1,0,0)[4] intercept   : AIC=182.658, Time=0.02 sec
 ARIMA(0,1,1)(0,0,1)[4] intercept   : AIC=181.146, Time=0.04 sec
 ARIMA(0,1,0)(0,0,0)[4]             : AIC=202.348, Time=0.01 sec
 ARIMA(0,1,1)(0,0,0)[4] intercept   : AIC=182.970, Time=0.02 sec
 ARIMA(0,1,1)(1,0,1)[4] intercept   : AIC=inf, Time=0.10 sec
 ARIMA(0,1,1)(0,0,2)[4] intercept   : AIC=180.271, Time=0.05 sec
 ARIMA(0,1,1)(1,0,2)[4] intercept   : AIC=inf, Time=0.17 sec
 ARIMA(0,1,1)(0,0,3)[4] intercept   : AIC=inf, Time=0.18 sec
 ARIMA(0,1,1)(1,0,3)[4] intercept   : AIC=179.195, Time=0.12 sec
 ARIMA(0,1,1)(2,0,3)[4] intercept   : AIC=inf, Time=0.23 sec
 ARIMA(0,1,1)(1,0,4)[4] intercept   : AIC=inf, Time=0.34 sec
 ARIMA(0,1,1)(0,0,4)[4] intercept   : AIC=177.880, Time=0.15 sec
 ARIMA(0,1,1)(0,0,5)[4] intercept   : AIC=inf, Time=0.36 sec
 ARIMA(0,1,1)(1,0,5)[4] intercept   : AIC=inf, Time=0

  return get_prediction_index(
  return get_prediction_index(


 ARIMA(1,0,1)(1,0,1)[4] intercept   : AIC=216.093, Time=0.11 sec
 ARIMA(0,0,0)(0,0,0)[4] intercept   : AIC=305.172, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[4] intercept   : AIC=216.366, Time=0.05 sec
 ARIMA(0,0,1)(0,0,1)[4] intercept   : AIC=255.481, Time=0.03 sec
 ARIMA(0,0,0)(0,0,0)[4]             : AIC=425.812, Time=0.01 sec
 ARIMA(1,0,1)(0,0,1)[4] intercept   : AIC=216.241, Time=0.06 sec
 ARIMA(1,0,1)(1,0,0)[4] intercept   : AIC=218.267, Time=0.08 sec
 ARIMA(1,0,1)(2,0,1)[4] intercept   : AIC=217.813, Time=0.14 sec
 ARIMA(1,0,1)(1,0,2)[4] intercept   : AIC=213.777, Time=0.19 sec
 ARIMA(1,0,1)(0,0,2)[4] intercept   : AIC=215.561, Time=0.10 sec
 ARIMA(1,0,1)(2,0,2)[4] intercept   : AIC=inf, Time=0.25 sec
 ARIMA(1,0,1)(1,0,3)[4] intercept   : AIC=inf, Time=0.26 sec
 ARIMA(1,0,1)(0,0,3)[4] intercept   : AIC=217.089, Time=0.11 sec
 ARIMA(1,0,1)(2,0,3)[4] intercept   : AIC=inf, Time=0.28 sec
 ARIMA(0,0,1)(1,0,2)[4] intercept   : AIC=258.400, Time=0.17 sec
 ARIMA(1,0,0)(1,0,2)[4] intercept   :