In [1]:
from lcensemble import LocalCascadeEnsemble
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, train_test_split, cross_val_score
from sklearn.tree import DecisionTreeRegressor,plot_tree
from sklearn.metrics import mean_squared_error, r2_score
from fredapi import Fred
import datetime
import joblib

# Initialize FRED API
fred = Fred(api_key='8445855393be6d75a6d33bcddffc7958')


ModuleNotFoundError: No module named 'lcensemble'

In [None]:

# Define the series IDs for the required data 
series_ids = {
    '5-Year, 5-Year Forward Inflation Expectation Rate': 'T5YIFR',
    '1 Year Treasury Rate': 'DGS1',
    'fed_balance_sheet_assets': 'WALCL',
    'gdp': 'GDP',
    'Summary of Economic Projections: Longer-Term Federal Funds Rate': 'FEDTARMDLR',
    'US 10yr': 'DGS10',
    'RRP': "WLRRAL",
    'US Treasury General Account' : 'D2WLTGAL'
}

# Fetch the data starting from 11/8/2021
start_date = '2012-01-03'

# Download data and store in a dictionary
data = {name: fred.get_series(series_id, observation_start=start_date) for name, series_id in series_ids.items()}

# Convert the dictionary to a DataFrame
df = pd.DataFrame(data)

# Resample the monthly GDP to daily frequency using forward fill method
df = df.ffill()

# Calculate the Fed Balance Sheet Assets to GDP ratio
df['Fed BS Assets to GDP'] = df['fed_balance_sheet_assets'] / (df['gdp'] * 1000)

# Calculate Net Liquidity Indicator
df['Net Liquidity Indicator'] = df['fed_balance_sheet_assets'] - df['RRP'] - df['US Treasury General Account']

# Drop unnecessary columns
df = df.drop(['fed_balance_sheet_assets', 'gdp', 'RRP', 'US Treasury General Account'], axis=1)
df = df.dropna()


In [None]:
# Calculate percent changes over various time horizons\n",
time_horizons = [5, 20, 60]
for column in df.columns:
    if column not in ['Date', 'Target']:
        df[f'{column}_pct_change_5'] = df[column].pct_change(periods=5) * 100
        df[f'{column}_pct_change_20'] = df[column].pct_change(periods=20) * 100
        df[f'{column}_pct_change_60'] = df[column].pct_change(periods=60) * 100

In [None]:
reg_tree = DecisionTreeRegressor(random_state=42)

# Define the target variable and the features
df['US 10yr lead'] = df['US 10yr'].shift(-20)
df = df.dropna()
X = df.drop(columns=['residuals','residual percentile_rank','US 10yr lead'])
#X = df.drop(columns=['US 10yr lead'])
y = df['US 10yr lead']

param_grid = {
    'base_estimator__max_depth': [3, 5, 7],
    'base_estimator__min_samples_split': [2, 5, 10],
    'base_estimator__min_samples_leaf': [1, 2, 4],
    'n_estimators': [5, 10, 20],
    'cascade_layer_threshold': [0.01, 0.05, 0.1]
}

In [None]:
initial_train_size = int(0.75 * y.shape[0])  # Set your desired initial train size here
tscv = TimeSeriesSplit(n_splits=df.shape[0] - initial_train_size, test_size=1)


In [None]:

best_score = float('inf')
best_params = None

# Initialize a pandas Series to store out-of-sample predictions with the same index as y
oof_predictions = pd.Series(index=y.index, dtype=float)

# Perform grid search with cross-validation using the LCE model
for train_index, test_index in tscv.split(X):
    if len(train_index) >= initial_train_size:
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        lce_model = LocalCascadeEnsemble(base_estimator=DecisionTreeRegressor(), random_state=42)
        
        grid_search = GridSearchCV(estimator=lce_model, param_grid=param_grid, scoring='neg_mean_squared_error', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        
        # Check if the score improved
        if grid_search.best_score_ < best_score:
            best_score = grid_search.best_score_
            best_params = grid_search.best_params_
        
        # Make predictions on the test set
        y_pred = grid_search.predict(X_test)
        
        # Store the predictions in the corresponding indices using iloc
        oof_predictions.iloc[test_index] = y_pred

# Fit the final LCE model with the best hyperparameters
best_tree = LocalCascadeEnsemble(base_estimator=DecisionTreeRegressor(**best_params), random_state=42)
best_tree.fit(X, y)

print("Best hyperparameters:", best_params)
