## Q1 addition

Import libraries

In [1]:
import pandas as pd
import numpy as np

import pandas_datareader.data as web
from datetime import datetime

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, mean_squared_error


Setting data date to use(2010~2024)

In [2]:
# Define date range
start = datetime(2010, 1, 1)
end = datetime(2024, 4, 30)

extract and clean data from FRED API (target: US_FFR)

In [3]:
# U.S. Federal Funds Rate (FRED symbol: 'FEDFUNDS')
US_FFR = web.DataReader('FEDFUNDS', 'fred', start, end)

In [4]:
# Create date range
date_range = pd.date_range(start="2010-01-01", end="2024-04-30", freq='D')

# Create empty DataFrame with the date column
US_FFR_forecast = pd.DataFrame({'DATE': date_range})
US_FFR_forecast = pd.merge(US_FFR_forecast, US_FFR, on = "DATE", how = "outer")
US_FFR_forecast = US_FFR_forecast.rename(columns={"FEDFUNDS": "US_FFR"})
US_FFR_forecast = US_FFR_forecast.ffill().bfill()

In [5]:
fred_series = {
    # Inflation
    "PCE Deflator"              : "PCEPI",        # Personal Consumption Expenditures Price Index
    "Core PCE"                  : "PCEPILFE",     # Core PCE (ex‐food & energy)
    "CPI"                       : "CPIAUCSL",     # Consumer Price Index for All Urban Consumers
    "Core CPI"                  : "CPILFESL",     # CPI less food & energy
    "PPI"                       : "PPIACO",       # Producer Price Index: All Commodities
    "5y5y Inflation Expectation": "T5YIE",        # 5‑Year, 5‑Year Forward Inflation Expectation Rate

    # Labor Market
    "Unemployment Rate"         : "UNRATE",       # Civilian Unemployment Rate
    "Labor Force Participation" : "CIVPART",       # Labor Force Participation Rate
    "Nonfarm Payrolls"          : "PAYEMS",       # All Employees, Total Nonfarm
    "Avg Hourly Earnings"       : "CES0500000003",# Avg Hourly Earnings: Total Private
    "Initial Jobless Claims"    : "ICSA",         # Initial Claims (Weekly)
    "Job Openings (JOLTS)"      : "JTSJOL",       # Job Openings: Total Nonfarm

    # Real Output
    "Real GDP"                  : "GDPC1",        # Real Gross Domestic Product
    "Industrial Production"     : "INDPRO",       # Industrial Production Index
    "Capacity Utilization"      : "TCU",      # Capacity Utilization: Total Industry
    "Retail Sales"              : "RSAFS",        # Retail and Food Services Sales
    "Durable Goods Orders"      : "DGORDER",      # Durable Goods: New Orders

    # Financial Conditions
    "10y–2y Yield Curve"        : "T10Y2Y",       # 10‑Year Minus 2‑Year Treasury Yield
    "BAA Corp Bond Yield"       : "BAA10Y",       # Moody’s Seasoned Baa Corporate Bond Yield
    "AAA Corp Bond Yield"       : "AAA",          # Moody’s Seasoned Aaa Corporate Bond Yield
    "Equity Volatility (VIX)"   : "VIXCLS",       # CBOE Volatility Index
    "Consumer Sentiment"        : "UMCSENT",      # U. of Michigan: Consumer Sentiment

    # Money & Credit
    "M2 Money Stock"            : "M2SL",         # M2 Money Stock
    #"Bank Lending Standards"    : "DRTSCLCCS",    # Senior Loan Officer Survey: Standards

    # FX & Commodities
    "Trade‑wt Dollar"           : "DTWEXBGS",     # Broad Trade‑Weighted Dollar Index
    "WTI Crude Oil"             : "DCOILWTICO"    # West Texas Intermediate Crude Oil Price
}

In [6]:
# downloading FRED items
df = web.DataReader(list(fred_series.values()), 'fred', start, end)
df.columns = list(fred_series.keys())
df = df.ffill().bfill()

In [7]:
# merge with FRED And downloaded items
US_FFR_forecast = pd.merge(US_FFR_forecast, df, how='left', on = 'DATE')
US_FFR_forecast = US_FFR_forecast.ffill().bfill()

In [8]:
# data clean up
US_FFR_forecast['DATE'] = pd.to_datetime(US_FFR_forecast['DATE'])
US_FFR_forecast.set_index('DATE', inplace=True)
US_FFR_forecast = US_FFR_forecast.resample('M').last()
US_FFR_forecast 

Unnamed: 0_level_0,US_FFR,PCE Deflator,Core PCE,CPI,Core CPI,PPI,5y5y Inflation Expectation,Unemployment Rate,Labor Force Participation,Nonfarm Payrolls,...,Retail Sales,Durable Goods Orders,10y–2y Yield Curve,BAA Corp Bond Yield,AAA Corp Bond Yield,Equity Volatility (VIX),Consumer Sentiment,M2 Money Stock,Trade‑wt Dollar,WTI Crude Oil
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-31,0.11,90.136,89.368,217.488,220.633,181.900,1.91,9.8,64.8,129802.0,...,339093.0,181016.0,2.81,2.57,5.26,24.62,74.4,8478.0,93.7638,72.85
2010-02-28,0.13,90.134,89.446,217.281,220.731,181.000,1.83,9.8,64.9,129705.0,...,339580.0,180450.0,2.80,2.62,5.35,19.50,73.6,8527.6,93.6602,79.72
2010-03-31,0.16,90.261,89.579,217.353,220.783,183.300,1.83,9.9,64.9,129865.0,...,346974.0,182148.0,2.82,2.47,5.27,17.59,73.6,8523.7,92.8987,83.45
2010-04-30,0.20,90.310,89.625,217.403,220.822,184.400,2.01,9.9,65.2,130120.0,...,349869.0,182948.0,2.72,2.38,5.29,22.05,72.2,8555.1,92.8423,86.07
2010-05-31,0.20,90.339,89.724,217.290,220.962,184.800,1.69,9.6,64.9,130643.0,...,346858.0,189932.0,2.55,2.89,4.96,32.07,73.6,8609.3,96.3062,74.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-31,5.33,121.602,120.528,308.735,313.230,249.866,2.12,3.8,62.5,156930.0,...,686277.0,304324.0,-0.35,1.61,4.74,12.45,69.7,20701.5,118.7686,71.89
2024-01-31,5.33,122.115,121.128,309.794,314.389,251.306,2.24,3.7,62.5,157049.0,...,680456.0,280614.0,-0.28,1.61,4.87,14.35,79.0,20773.1,120.7727,76.28
2024-02-29,5.33,122.494,121.418,311.022,315.555,254.926,2.40,3.9,62.6,157271.0,...,685280.0,282623.0,-0.39,1.55,5.03,13.40,76.9,20861.2,121.5385,79.22
2024-03-31,5.33,122.912,121.829,312.107,316.762,255.095,2.38,3.9,62.7,157517.0,...,687641.0,287423.0,-0.39,1.50,5.01,13.01,79.4,20901.4,121.4062,83.96


Random Forest machine learning model to forecast US_FFR

parameters:  
    date: Jan 2010 ~ Jun 2024  
    forecast horizon: next month  
    target: whether FFR will increase or decrease(direction) / and how much impact?(magnitude)  
    train & test split: 80% train, 20% test  

In [9]:
df = US_FFR_forecast

# Build lag features
for lag in range (1, 7):
    df[f'lag_{lag}'] = df['US_FFR'].shift(lag)

# Define targets
df['direction'] = (df['US_FFR'].shift(-1) > df['US_FFR']).astype(int)   # 1 if next > current
df['delta']     = df['US_FFR'].shift(-1) - df['US_FFR']                 # numeric change

# Drop rows with NaNs (due to shifting)
df = df.dropna()

# Split into features & labels
features = [f'lag_{lag}' for lag in range (1, 7)]
X = df[features]
y_clf = df['direction']
y_reg = df['delta']

# Time‑aware train/test split
split_point = int(len(df) * 0.8)
X_train, X_test = X.iloc[:split_point], X.iloc[split_point:]
y_clf_train, y_clf_test = y_clf.iloc[:split_point], y_clf.iloc[split_point:]
y_reg_train, y_reg_test = y_reg.iloc[:split_point], y_reg.iloc[split_point:]

# Fit a Random Forest classifier for direction
clf = RandomForestClassifier(n_estimators=1000, random_state=1234)
clf.fit(X_train, y_clf_train)

# Fit a Random Forest regressor for magnitude
reg = RandomForestRegressor(n_estimators=1000, random_state=1234)
reg.fit(X_train, y_reg_train)

# Evaluate on test set
y_clf_pred     = clf.predict(X_test)
y_clf_proba    = clf.predict_proba(X_test)[:,1]  # P(direction=1)

y_reg_pred     = reg.predict(X_test)

print("=== Classification (Direction) ===")
print("Accuracy:", accuracy_score(y_clf_test, y_clf_pred))
print("ROC AUC: ", roc_auc_score(y_clf_test, y_clf_proba))

print("\n=== Regression (Magnitude) ===")
print("RMSE:    ", np.sqrt(mean_squared_error(y_reg_test, y_reg_pred)))

# 10) Make next‑month forecasts
last_row = X.iloc[[-1]]
next_dir_proba = clf.predict_proba(last_row)[0,1]
next_delta     = reg.predict(last_row)[0]

print(f"\nNext‑month ↑ probability: {next_dir_proba:.2%}")
print(f"Expected ΔFFR: {next_delta:.3f} (predicted FFR ≈ {df['US_FFR'].iloc[-1] + next_delta:.3f})")

=== Classification (Direction) ===
Accuracy: 0.6060606060606061
ROC AUC:  0.6370370370370371

=== Regression (Magnitude) ===
RMSE:     0.2822311768248451

Next‑month ↑ probability: 37.20%
Expected ΔFFR: -0.198 (predicted FFR ≈ 5.132)


interpretation of result:  
    FFR may become 5.134% in June 2024 for 65% possibility based on this model's best guess.  
    Accuracy means this model resonates with training data(historic data) by 63%.  
    ROC AUC reflects the model’s ability to rank true “up” months above “not up” months—i.e., it orders a randomly chosen increase‐month higher than a non‐increase‐month about 73.6% of the time.  
    RMSE means predicted change in FFR is off by about 0.30 percentage points on average.  

Limitations and improvements:  
    Not every data is released monthly, forward and back fill is used in this model.  
    Ignoring structural breaks and regime shifts, for example, there was huge drop during FEB 2020 ~ APR 2020, from 1.58 to 0.05 due to COVID.  
    More variables can helpful, such as FED minutes.  
    Rolling (time‑series) cross‑validation (e.g. expanding or sliding window) to get more reliable performance estimates.  
    Fine tuning might help to increase the model's accuracy.  

EOD