In [13]:
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, LassoCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from prophet import Prophet
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [14]:
data = pd.read_csv("../data/raw_macro.csv", index_col=0, parse_dates=True)
data = data.resample("Q").mean().interpolate().ffill().bfill()
data['log_gdp'] = np.log(data['gdp'])

FileNotFoundError: [Errno 2] No such file or directory: '../data/raw_macro.csv'

In [None]:
os.makedirs("../data", exist_ok=True)
data.to_csv("../data/clean_macro.csv")
data.to_pickle("../data/final_macro.pkl")

X = data.drop(columns=['log_gdp'])
y = data['log_gdp']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

os.makedirs("../models", exist_ok=True)

In [None]:
models = {
    'ridge': Ridge(),
    'xgb': XGBRegressor(),
    'lgbm': LGBMRegressor(),
    'catboost': CatBoostRegressor(verbose=0)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    joblib.dump(model, f"../models/{name}_forecast.pkl")

In [None]:
prophet_df = data.reset_index()[['date', 'log_gdp']]
prophet_df.columns = ['ds', 'y']
prophet = Prophet()
prophet.fit(prophet_df)
joblib.dump(prophet, "../models/prophet_forecast.pkl")

In [None]:
class MacroDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class GDPNet(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze()

train_dataset = MacroDataset(X_train, y_train)
test_dataset = MacroDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False)

model = GDPNet(input_size=X.shape[1])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(200):
    model.train()
    for xb, yb in train_loader:
        optimizer.zero_grad()
        pred = model(xb)
        loss = criterion(pred, yb)
        loss.backward()
        optimizer.step()

torch.save(model.state_dict(), "../models/gdpnet_torch.pt")

In [None]:
T = data['fed_rate']
Y = data['log_gdp']
X_causal = data[['unemployment', 'cpi', 'money_supply', 'employment']]

# Binary treatment assignment
t_thresh = T.median()
T_binary = (T > t_thresh).astype(int)

X_treated = X_causal[T_binary == 1]
Y_treated = Y[T_binary == 1]
X_control = X_causal[T_binary == 0]
Y_control = Y[T_binary == 0]

reg_treated = GradientBoostingRegressor().fit(X_treated, Y_treated)
reg_control = GradientBoostingRegressor().fit(X_control, Y_control)

# Predict counterfactual outcomes
y1_pred = reg_treated.predict(X_causal)
y0_pred = reg_control.predict(X_causal)
treatment_effect = y1_pred - y0_pred
avg_effect = np.mean(treatment_effect)

np.save("../models/treatment_effect.npy", treatment_effect)
with open("../models/avg_treatment_effect.txt", "w") as f:
    f.write(f"Average Treatment Effect (binary fed_rate > {t_thresh:.2f}): {avg_effect:.4f}\n")


In [None]:
# ========== 5. Manual T-Learner with scikit-learn ==========
T = data['fed_rate']
Y = data['log_gdp']
X_causal = data[['unemployment', 'cpi', 'money_supply', 'employment']]

# Binary treatment assignment
t_thresh = T.median()
T_binary = (T > t_thresh).astype(int)

X_treated = X_causal[T_binary == 1]
Y_treated = Y[T_binary == 1]
X_control = X_causal[T_binary == 0]
Y_control = Y[T_binary == 0]

reg_treated = GradientBoostingRegressor().fit(X_treated, Y_treated)
reg_control = GradientBoostingRegressor().fit(X_control, Y_control)

# Predict counterfactual outcomes
y1_pred = reg_treated.predict(X_causal)
y0_pred = reg_control.predict(X_causal)
treatment_effect = y1_pred - y0_pred
avg_effect = np.mean(treatment_effect)

np.save("../models/treatment_effect.npy", treatment_effect)
with open("../models/avg_treatment_effect.txt", "w") as f:
    f.write(f"Average Treatment Effect (binary fed_rate > {t_thresh:.2f}): {avg_effect:.4f}\n")
