# AR(1) method training

## For 1 month model

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg

In [2]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [3]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [4]:
# Transform the predictors
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [5]:
# Prepare the data for machine learning
X = df.iloc[:, 5:].T
y = df.iloc[:, 1]

In [6]:
# Remove the first row as it contains NaN
X = X.iloc[:, 1:]
y = y.iloc[1:]

In [7]:
# Implement time series cross-validation
from sklearn.model_selection import TimeSeriesSplit

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train an AR(1) model
    model = AutoReg(y_train.values, lags=1)
    model_fit = model.fit()

    # Make predictions
    y_pred = model_fit.predict(start=len(y_train), end=len(y_train)+len(y_test)-1, dynamic=False)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0001811770840277184
Mean Squared Error: 0.0002538879050745956
Mean Squared Error: 0.0008117440680446121
Mean Squared Error: 0.0004755494735527069
Mean Squared Error: 0.00381723110544419


In [8]:
# Train the AR(1) model on the entire dataset
model = AutoReg(y.values, lags=1)
model_ar_1 = model.fit()

* Now the model can be used to forecast future inflation rates

* For example, to predict the inflation rate for the next month given its predictors

In [9]:
# Forecast the inflation rate 1 month ahead
forecast = model_ar_1.predict(start=len(y), end=len(y), dynamic=False)
print(f"Forecast for 1 month ahead: {forecast}")

Forecast for 1 month ahead: [0.03252426]


## For 3 month model

In [11]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [12]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [13]:
# Transform the predictors
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [14]:
# Prepare the data for machine learning
X = df.iloc[:, 5:].T
y = df.iloc[:, 1]

In [15]:
# Shift y 3 months ahead
y = y.shift(-3)

In [16]:
# Remove the last 3 rows as they contain NaN
X = X.iloc[:, 1:-3]
y = y.iloc[1:-3]

In [17]:
# Implement time series cross-validation
tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train an AR(1) model
    model = AutoReg(y_train.values, lags=1)
    model_fit = model.fit()

    # Make predictions
    y_pred = model_fit.predict(start=len(y_train), end=len(y_train)+len(y_test)-1, dynamic=False)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.00016874668123816974
Mean Squared Error: 0.00029500747338626364
Mean Squared Error: 0.0007770341209473035
Mean Squared Error: 0.0006612665985256351
Mean Squared Error: 0.003547166157278783


In [18]:
# Train the AR(1) model on the entire dataset
model = AutoReg(y.values, lags=1)
model_ar_3 = model.fit()

* Now the model can be used to forecast future inflation rates

* For example, to predict the inflation rate for the next quarter given its predictors

In [19]:
# Forecast the inflation rate 3 months ahead
forecast = model_ar_3.predict(start=len(y), end=len(y)+2, dynamic=False)
print(f"Forecast for 3 months ahead: {forecast[-1]}")

Forecast for 3 months ahead: 0.032202563663067445


## For 12 month model

In [20]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [21]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [22]:
# Transform the predictors
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [23]:
# Prepare the data for machine learning
X = df.iloc[:, 5:].T
y = df.iloc[:, 1]

In [24]:
# Shift y 12 months ahead
y = y.shift(-12)

In [25]:
# Remove the last 12 rows as they contain NaN
X = X.iloc[:, 1:-12]
y = y.iloc[1:-12]

In [26]:
# Implement time series cross-validation
tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train an AR(1) model
    model = AutoReg(y_train.values, lags=1)
    model_fit = model.fit()

    # Make predictions
    y_pred = model_fit.predict(start=len(y_train), end=len(y_train)+len(y_test)-1, dynamic=False)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")


Mean Squared Error: 0.00010239409494070976
Mean Squared Error: 0.0004937425872471006
Mean Squared Error: 0.0007412711984037616
Mean Squared Error: 0.002188092945199966
Mean Squared Error: 0.0011927727129309913


In [27]:
# Train the AR(1) model on the entire dataset
model = AutoReg(y.values, lags=1)
model_ar_12 = model.fit()

* Now we get the model_ar_12

* The model can be used to forecast future inflation rates

* For example, to predict the inflation rate for the next year given its predictors

In [28]:
# Forecast the inflation rate 12 months ahead
forecast = model_ar_12.predict(start=len(y), end=len(y)+11, dynamic=False)
print(f"Forecast for 12 months ahead: {forecast[-1]}")

Forecast for 12 months ahead: 0.0320111244775863


# LASSO metheod training

## For 1 month model

In [29]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit
import math

In [30]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [31]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [32]:
# Transform the predictors into log difference
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [33]:
# Prepare the data for machine learning
X = df.iloc[:, 5:]
y = df.iloc[:, 1]

In [34]:
# Remove the first row (NaN value due to the shift operation)
X = X.iloc[1:]
y = y.iloc[1:]

In [35]:
# Implement a time series split for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

In [36]:
# Train a LASSO regression model
model = LassoCV(cv=tscv)
model_lasso_1 = model.fit(X, y)

* Now, we get the model, then we use MSE to evaluate the model

In [37]:
mse = mean_squared_error(y, model.predict(X))
rmse = math.sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.003467640633310763


* We can use the model to forecast future inflation rates

* For example, to forecast the inflation rate for the next month given its predictors

In [38]:
next_month_predictors = X.iloc[-1].values.reshape(1, -1)
next_month_inflation_rate = model_lasso_1.predict(next_month_predictors)
print(f"Forecasted inflation rate for the next month: {next_month_inflation_rate[0]}")

Forecasted inflation rate for the next month: 0.03449263811867629


## For 3 month model

In [40]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [41]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [42]:
# Transform the predictors into log difference
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [43]:
# Prepare the data for machine learning
X = df.iloc[:, 5:]
y = df.iloc[:, 1]

In [44]:
# Shift the target variable 3 months into the future
y = y.shift(-3)

In [45]:
# Remove the first row (NaN value due to the shift operation) and the last 3 rows (NaN values due to the shift operation on y)
X = X.iloc[1:-3]
y = y.iloc[1:-3]

In [46]:
# Implement a time series split for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

In [47]:
# Train a LASSO regression model
model = LassoCV(cv=tscv, max_iter=10000)
model_lasso_3 = model.fit(X, y)

* Now, we get the model, then we use MSE to evaluate the model

In [48]:
mse = mean_squared_error(y, model.predict(X))
rmse = math.sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.019214750942880412


* We can use the model to forecast future inflation rates

* For example, to forecast the inflation rate for the next quarter given its predictors

In [49]:
third_month_predictors = X.iloc[-1].values.reshape(1, -1)
third_month_inflation_rate = model_lasso_3.predict(third_month_predictors)
print(f"Forecasted inflation rate for the 3rd month from the last month: {third_month_inflation_rate[0]}")

Forecasted inflation rate for the 3rd month from the last month: 0.013062241924392695


## For 12 month model

In [50]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [51]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [52]:
# Transform the predictors into log difference
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [53]:
# Prepare the data for machine learning
X = df.iloc[:, 5:]
y = df.iloc[:, 1]

In [54]:
# Shift the target variable 12 months into the future
y = y.shift(-12)

In [58]:
# Remove the first row (NaN value due to the shift operation) and the last 12 rows (NaN values due to the shift operation on y)
X = X.iloc[1:-12]
y = y.iloc[1:-12]

In [59]:
# Implement a time series split for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

In [60]:
# Train a LASSO regression model
model = LassoCV(cv=tscv, max_iter=100000)
model_lasso_12 = model.fit(X, y)

* Now, we get the model, then we use MSE to evaluate the model

In [61]:
# Evaluate the model
mse = mean_squared_error(y, model.predict(X))
rmse = math.sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.02153314731378203


* We can use the model to forecast future inflation rates

* For example, to forecast the inflation rate for the next quarter given its predictors

In [62]:
twelfth_month_predictors = X.iloc[-1].values.reshape(1, -1)
twelfth_month_inflation_rate = model_lasso_12.predict(twelfth_month_predictors)
print(f"Forecasted inflation rate for the 12th month from the last month: {twelfth_month_inflation_rate[0]}")

Forecasted inflation rate for the 12th month from the last month: 0.017516847355285723


# Random forest method

## For 1 month model

In [63]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [64]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [65]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [66]:
# Transform the predictors into log difference
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [67]:
# Prepare the data for machine learning
X = df.iloc[:, 5:]
y = df.iloc[:, 1]

In [68]:
# Remove the first row as it contains NaN due to the shift operation
X = X.iloc[1:]
y = y.iloc[1:]

In [69]:
# Split the data into training and testing sets in a time series manner
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train a Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model_random_1 = model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0011656305974605122
Mean Squared Error: 0.0001886851944013001
Mean Squared Error: 8.72955437799227e-05
Mean Squared Error: 0.00039595540898014324
Mean Squared Error: 3.626448610114312e-05


* Now the model can be used to forecast future inflation rates

* For example, to predict the inflation rate for the next month given its predictors

In [70]:
next_month_predictors = X.iloc[-1].values.reshape(1, -1)
next_month_inflation_rate = model_random_1.predict(next_month_predictors)
print(f"Predicted inflation rate for next month: {next_month_inflation_rate[0]}")

Predicted inflation rate for next month: 0.030184702997194997


## For 3 month model

In [71]:
# Shift the target variable three months back
y = y.shift(-3)

In [72]:
# Remove the last three rows as they contain NaN due to the shift operation
X = X.iloc[:-3]
y = y.iloc[:-3]

In [73]:
# Split the data into training and testing sets in a time series manner
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train a Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model_random_3 = model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.0016547381380780564
Mean Squared Error: 0.0005455901696460697
Mean Squared Error: 0.0003150834250461282
Mean Squared Error: 0.00111626967691056
Mean Squared Error: 0.00033669143374951457


* Now the model can be used to forecast future inflation rates

* For example, to predict the inflation rate for the next quarter given its predictors

In [74]:
next_quarter_predictors = X.iloc[-1].values.reshape(1, -1)
next_quarter_inflation_rate = model_random_3.predict(next_quarter_predictors)
print(f"Predicted inflation rate for next quarter: {next_quarter_inflation_rate[0]}")

Predicted inflation rate for next quarter: 0.011397978322821932


## For 12 month model

In [75]:
# Shift the target variable twelve months back
y = y.shift(-12)

In [76]:
# Remove the last twelve rows as they contain NaN due to the shift operation
X = X.iloc[:-12]
y = y.iloc[:-12]

In [77]:
# Split the data into training and testing sets in a time series manner
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train a Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model_random_12 = model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")

Mean Squared Error: 0.002012329966294828
Mean Squared Error: 0.0007226249082602667
Mean Squared Error: 0.0004900612268844443
Mean Squared Error: 0.0010109018769290366
Mean Squared Error: 0.0003575531302877318


* Now the model can be used to forecast future inflation rates

* For example, to predict the inflation rate for the next year given its predictors

In [78]:
next_year_predictors = X.iloc[-1].values.reshape(1, -1)
next_year_inflation_rate = model_random_12.predict(next_year_predictors)
print(f"Predicted inflation rate for next year: {next_year_inflation_rate[0]}")

Predicted inflation rate for next year: 0.029919480961150775


# Gradient boosting method

## For 1 month model

In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import math

In [80]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [81]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [82]:
# Transform the predictors into log difference
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [83]:
# Prepare the data for machine learning
X = df.iloc[:, 5:]
y = df.iloc[:, 1]

In [84]:
# Remove the first row as it contains NaN due to the shift operation
X = X.iloc[1:]
y = y.iloc[1:]

In [85]:
# Split the data into training and validation sets
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train a Gradient Boosting model
    model = GradientBoostingRegressor()
    model_grad_1 = model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = math.sqrt(mse)
    print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.03206627938885181
Root Mean Squared Error: 0.013549824746099066
Root Mean Squared Error: 0.0091258410557121
Root Mean Squared Error: 0.019798859128981584
Root Mean Squared Error: 0.006788702235844036


* Now the model can be used to forecast future inflation rates

* Use the model to forecast future inflation rates

* For example, to predict the inflation rate for the next month given its predictors

In [86]:
next_month_predictors = X.iloc[-1].values.reshape(1, -1)
next_month_inflation_rate = model_grad_1.predict(next_month_predictors)
print(f"Predicted inflation rate for next month: {next_month_inflation_rate[0]}")

Predicted inflation rate for next month: 0.029491812939379174


## For 3 months model

In [87]:
# Load the data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_training.xlsx"
df = pd.read_excel(url, header=None)
df = df.drop(df.columns[0], axis=1)
df = df.T

In [88]:
# Compute the inflation rate
df.iloc[:, 1] = (np.log(df.iloc[:, 1]) - np.log(df.iloc[:, 1].shift(1))) * 12

In [89]:
# Transform the predictors into log difference
for i in range(5, df.shape[1]):
    df.iloc[:, i] = (np.log(df.iloc[:, i]) - np.log(df.iloc[:, i].shift(1))) * 12

In [90]:
# Prepare the data for machine learning
X = df.iloc[:, 5:]
y = df.iloc[:, 1]

In [91]:
# Shift y 3 months ahead for 3-month-ahead forecast
y = y.shift(-3)

In [92]:
# Remove the first row as it contains NaN due to the shift operation
X = X.iloc[1:-3]
y = y.iloc[1:-3]

In [93]:
# Split the data into training and validation sets
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train a Gradient Boosting model
    model = GradientBoostingRegressor()
    model_grad_3 = model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = math.sqrt(mse)
    print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.040090647978846
Root Mean Squared Error: 0.024253292233585994
Root Mean Squared Error: 0.01908623341251618
Root Mean Squared Error: 0.03299673724525528
Root Mean Squared Error: 0.01999223301862206


* Now the model can be used to forecast future inflation rates

* Use the model to forecast future inflation rates

* For example, to predict the inflation rate for the next quarter given its predictors

In [94]:
next_quarter_predictors = X.iloc[-1].values.reshape(1, -1)
next_quarter_inflation_rate = model_grad_3.predict(next_quarter_predictors)
print(f"Predicted inflation rate for next quarter: {next_quarter_inflation_rate[0]}")

Predicted inflation rate for next quarter: 0.01462779860652663


## For 12 months model

In [95]:
# Shift y 12 months ahead for 12-month-ahead forecast
y = y.shift(-12)

In [96]:
# Remove the first row as it contains NaN due to the shift operation
X = X.iloc[:-12]
y = y.iloc[:-12]

In [97]:
# Split the data into training and validation sets
tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train a Gradient Boosting model
    model = GradientBoostingRegressor()
    model_grad_12 = model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = math.sqrt(mse)
    print(f"Root Mean Squared Error: {rmse}")

Root Mean Squared Error: 0.044433300013940365
Root Mean Squared Error: 0.02913278524149222
Root Mean Squared Error: 0.024190839862811748
Root Mean Squared Error: 0.03225369297727556
Root Mean Squared Error: 0.02041128562636778


* Now the model can be used to forecast future inflation rates

* Use the model to forecast future inflation rates

* For example, to predict the inflation rate for the next year given its predictors

In [98]:
next_year_predictors = X.iloc[-1].values.reshape(1, -1)
next_year_inflation_rate = model_grad_12.predict(next_year_predictors)
print(f"Predicted inflation rate for next year: {next_year_inflation_rate[0]}")

Predicted inflation rate for next year: 0.01510861518163056


# Testing

## AR(1) method training

* Since we have trained 3 AR(1) models, now we can use it into the testing data.

* First, we shall prepare the data.

In [99]:
# Load the testing data
test_url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_testing_fake.xlsx"
test_df = pd.read_excel(test_url, header=None)
test_df = test_df.drop(df.columns[0], axis=1)
test_df = test_df.T

In [100]:
# Compute the inflation rate for the testing data
test_df.iloc[:, 1] = (np.log(test_df.iloc[:, 1]) - np.log(test_df.iloc[:, 1].shift(1))) * 12

In [101]:
# Transform the predictors for the testing data
for i in range(4, test_df.shape[0]):
    test_df.iloc[:, i] = (np.log(test_df.iloc[:, i]) - np.log(test_df.iloc[:, i].shift(1))) * 12

In [102]:
# Prepare the testing data for machine learning
X_test = test_df.iloc[:, 4:].T
y_test = test_df.iloc[:, 1]

In [103]:
# Remove the first row as they contain NaN
X_test = X_test.iloc[:, 1:]
y_test = y_test.iloc[1:]

* Then, we make predictions on the testing data

In [104]:
# 1 month ahead
y_pred = model_ar_1.predict(start=len(y), end=len(y)+len(y_test)-1, dynamic=False)

# Evaluate the model on the testing data
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.54792578416998


In [105]:
# 3 month ahead
y_pred = model_ar_3.predict(start=len(y), end=len(y)+len(y_test)-1, dynamic=False)

# Evaluate the model on the testing data
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.54384842085949


In [106]:
# 12 month ahead
y_pred = model_ar_12.predict(start=len(y), end=len(y)+len(y_test)-1, dynamic=False)

# Evaluate the model on the testing data
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.54611048310749


## LASSO metheod training

* Like before, first, we prepare the data.

In [115]:
# Load the testing data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_testing_fake.xlsx"
df_test = pd.read_excel(url, header=None)
df_test = df_test.drop(df_test.columns[0], axis=1)
df_test = df_test.T

In [116]:
# Compute the inflation rate
df_test.iloc[:, 1] = (np.log(df_test.iloc[:, 1]) - np.log(df_test.iloc[:, 1].shift(1))) * 12

In [117]:
# Transform the predictors into log difference
for i in range(4, test_df.shape[1]):
    df_test.iloc[:, i] = (np.log(df_test.iloc[:, i]) - np.log(df_test.iloc[:, i].shift(1))) * 12

In [118]:
# Prepare the testing data for machine learning
X_test = df_test.iloc[:, 4:]
y_test = df_test.iloc[:, 1]

In [119]:
# Remove the first row as it contains NaN due to the shift operation
X_test = X_test.iloc[1:]
y_test = y_test.iloc[1:]

* Then, we make predictions on the testing data

In [120]:
# 1 month ahead
y_pred = model_lasso_1.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 30.138811149651296


In [121]:
# 3 months ahead
y_pred = model_lasso_3.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 31.81093545320011


In [122]:
# 12 month ahead
y_pred = model_lasso_12.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.00146377455425


## Random forest method

* Like before, first, we prepare the data.

In [107]:
# Load the testing data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_testing_fake.xlsx"
df_test = pd.read_excel(url, header=None)
df_test = df_test.drop(df_test.columns[0], axis=1)
df_test = df_test.T

In [108]:
# Compute the inflation rate
df_test.iloc[:, 1] = (np.log(df_test.iloc[:, 1]) - np.log(df_test.iloc[:, 1].shift(1))) * 12

In [109]:
# Transform the predictors into log difference
for i in range(4, test_df.shape[1]):
    df_test.iloc[:, i] = (np.log(df_test.iloc[:, i]) - np.log(df_test.iloc[:, i].shift(1))) * 12

In [110]:
# Prepare the testing data for machine learning
X_test = df_test.iloc[:, 4:]
y_test = df_test.iloc[:, 1]

In [111]:
# Remove the first row as it contains NaN due to the shift operation
X_test = X_test.iloc[1:]
y_test = y_test.iloc[1:]

* Then, we make predictions on the testing data

In [112]:
# 1 month ahead
y_pred = model_random_1.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.417290398386925


In [113]:
# 3 months ahead
y_pred = model_random_3.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.52127155650131


In [114]:
# 12 month ahead
y_pred = model_random_12.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.561290359066405


## Gradient boosting method

* Like before, first, we prepare the data.

In [123]:
# Load the testing data
url = "https://github.com/zhentaoshi/Econ5821/raw/main/data_example/US_PCE_testing_fake.xlsx"
df_test = pd.read_excel(url, header=None)
df_test = df_test.drop(df_test.columns[0], axis=1)
df_test = df_test.T

In [124]:
# Compute the inflation rate
df_test.iloc[:, 1] = (np.log(df_test.iloc[:, 1]) - np.log(df_test.iloc[:, 1].shift(1))) * 12

In [125]:
# Transform the predictors into log difference
for i in range(4, test_df.shape[1]):
    df_test.iloc[:, i] = (np.log(df_test.iloc[:, i]) - np.log(df_test.iloc[:, i].shift(1))) * 12

In [126]:
# Prepare the testing data for machine learning
X_test = df_test.iloc[:, 4:]
y_test = df_test.iloc[:, 1]

In [127]:
# Remove the first row as it contains NaN due to the shift operation
X_test = X_test.iloc[1:]
y_test = y_test.iloc[1:]

* Then, we make predictions on the testing data

In [128]:
# 1 month ahead
y_pred = model_grad_12.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.57522916185763


In [129]:
# 3 month ahead
y_pred = model_grad_3.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.494207314932105


In [130]:
# 12 month ahead
y_pred = model_grad_12.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error on testing data: {mse}")

Mean Squared Error on testing data: 32.57522916185763
