# 📊 Macro-Based Market Forecasting
This notebook builds a forecasting model using macroeconomic data and KMeans-based regime detection.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Lasso
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.model_selection import TimeSeriesSplit


## 📂 Load Data

In [None]:
# Load your dataset here
df = pd.read_excel("market_data.xlsx", sheet_name="historical_data")
df = df.sort_values("Date").set_index("Date")


## 🧮 Feature Engineering: Lags and Macro-only Selection

In [None]:
# Generate lags
lag_months = [1, 3, 6]
for col in df.columns:
    for lag in lag_months:
        df[f"{col}_lag{lag}"] = df[col].shift(lag)

# Target
df["MSCI_World_target"] = df["MSCI World"].shift(-1)

# 1M momentum features
returns_df = df.pct_change().add_suffix('_pct_1m')
full_df = pd.concat([df, returns_df], axis=1).dropna()

# Define X and y
X = full_df.drop(columns=["MSCI World", "MSCI_World_target"])
y = full_df["MSCI_World_target"]

# Clean and restrict to macro-only
X = X.replace([np.inf, -np.inf], np.nan).dropna()
y = y.loc[X.index]
X = X.drop(columns=[col for col in X.columns if any(keyword in col for keyword in ["NASDAQ", "Portfolio", "EURO STOXX", "OMX30", "Russell", "S&P 500", "MSCI", "PPM"])])


## 🤖 KMeans Regime Detection

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=5, random_state=42, n_init=10)
regimes = kmeans.fit_predict(X_scaled)

X["Regime"] = regimes
X["Date"] = y.index


## 📊 Regime Macro Summary

In [None]:
regime_summary = X.groupby("Regime").mean().round(2)
regime_summary


## 🔮 Lasso Forecasting Model

In [None]:
X_encoded = pd.get_dummies(X.drop(columns=["Date"]))
lasso_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("lasso", Lasso(alpha=0.1))
])
lasso_pipeline.fit(X_encoded, y)
y_pred = lasso_pipeline.predict(X_encoded)

# Evaluate
rmse = np.sqrt(np.mean((y - y_pred) ** 2))
pct_error = (rmse / y.mean()) * 100
rmse, pct_error


## 📈 Actual vs Predicted

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(y.index, y, label='Actual MSCI World')
plt.plot(y.index, y_pred, label='Predicted', alpha=0.7)
plt.title("MSCI World Forecast - Lasso + Macro Regimes")
plt.xlabel("Date")
plt.ylabel("Index Level")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


## 💾 Save Models

In [None]:
joblib.dump(lasso_pipeline, "final_macro_lasso_model.joblib")
joblib.dump(kmeans, "final_kmeans_macro_model.joblib")
joblib.dump(scaler, "final_scaler.joblib")
