WEEKLY RIDGE & LASSO REGRESSION FORECASTING

In [None]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import shapiro
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.outliers_influence import variance_inflation_factor

sns.set_theme(style="whitegrid")

In [None]:
#Load and Prepare Data
data_path = "data/weekly_data.csv"     # <-- Update path here
target_column = "NFTY_Weekly_Return"   # <-- Replace with your target column

df = pd.read_csv(data_path)
print(f" Data Loaded: {df.shape[0]} rows, {df.shape[1]} columns")

X = df.drop(columns=[target_column], errors="ignore")
y = df[target_column]

# Train-Test Split
train_size = int(len(df) * 0.8)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y[train_size:]

print(f"Train size: {len(X_train)}, Test size: {len(X_test)}")



In [None]:
#Multicollinearity Check (VIF)
vif_data = pd.DataFrame({
    "Feature": X.columns,
    "VIF": [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
})
print("\nVIF Analysis (Multicollinearity Check):")
print(vif_data)

high_vif = vif_data[vif_data["VIF"] > 5]
if not high_vif.empty:
    print("\n High VIF Features:")
    print(high_vif)


In [None]:
#Helper: Model Evaluation & Diagnostics
def evaluate_model(model, name, X_train, X_test, y_train, y_test):
    """Fit, evaluate, and visualize Ridge or Lasso regression."""
    
    # Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Metrics
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
    test_mae = mean_absolute_error(y_test, y_test_pred)

    print(f"\n=== {name} Evaluation ===")
    print(f"Train R²: {train_r2:.4f}")
    print(f"Test  R²: {test_r2:.4f}")
    print(f"RMSE: {test_rmse:.4f}")
    print(f"MAE:  {test_mae:.4f}")

    # Coefficients Plot
    coefficients = pd.DataFrame(model.coef_, index=X_train.columns, columns=["Coefficient"]).sort_values(by="Coefficient")
    plt.figure(figsize=(14, 7))
    sns.barplot(x="Coefficient", y=coefficients.index, data=coefficients, palette="coolwarm")
    plt.title(f"{name} - Coefficients Importance")
    plt.grid(True)
    plt.show()

    # Actual vs Predicted
    plt.figure(figsize=(14, 6))
    plt.plot(y_test.values, label="Actual", marker='o')
    plt.plot(y_test_pred, label="Predicted", linestyle='--', marker='x')
    plt.legend()
    plt.title(f"{name} - Actual vs Predicted (Weekly Returns)")
    plt.grid(True)
    plt.show()

    # Residuals
    residuals = y_test.values - y_test_pred
    plt.figure(figsize=(12, 6))
    plt.scatter(y_test_pred, residuals, edgecolor='k', alpha=0.7)
    plt.axhline(0, color='red', linestyle='--')
    plt.title(f"{name} - Residual Plot")
    plt.grid(True)
    plt.show()

    # Diagnostics
    shapiro_test = shapiro(residuals)
    bp_test = het_breuschpagan(residuals, sm.add_constant(X_test))
    dw_stat = durbin_watson(residuals)

    print(f"\n{name} Diagnostic Tests:")
    print(f"Normality (Shapiro): p={shapiro_test.pvalue:.4f}")
    print(f"Homoskedasticity (Breusch–Pagan): p={bp_test[1]:.4f}")
    print(f"Autocorrelation (Durbin–Watson): {dw_stat:.2f}")

    return {
        "Model": name,
        "Train R²": train_r2,
        "Test R²": test_r2,
        "RMSE": test_rmse,
        "MAE": test_mae,
        "DW": dw_stat
    }


In [None]:
#ridge Regression
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
ridge_results = evaluate_model(ridge, "Ridge", X_train, X_test, y_train, y_test)


In [None]:
#Lasso Regression
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
lasso_results = evaluate_model(lasso, "Lasso", X_train, X_test, y_train, y_test)

In [None]:
#model Comparison Summary
comparison = pd.DataFrame([ridge_results, lasso_results])
print("\n=== Model Comparison Summary ===")
print(comparison.round(4).to_string(index=False))

plt.figure(figsize=(8, 5))
sns.barplot(data=comparison.melt(id_vars="Model", value_vars=["Test R²", "RMSE"]), 
            x="variable", y="value", hue="Model", palette="Set2")
plt.title("Ridge vs Lasso - Performance Comparison")
plt.grid(True)
plt.show()

In [None]:
#next 4-Week Forecast (Ridge)
forecast_weeks = 4
X_future = X.iloc[[-1]].values
future_forecast = []

for _ in range(forecast_weeks):
    next_pred = ridge.predict(X_future)[0]
    future_forecast.append(round(next_pred, 3))
    X_future = np.roll(X_future, -1)
    X_future[0, -1] = next_pred

forecast_df = pd.DataFrame({
    "Week": [f"Week +{i+1}" for i in range(forecast_weeks)],
    "Forecast (Ridge)": future_forecast
})
print("\nNext 4 Weeks Forecast (Ridge):")
print(forecast_df)


In [None]:
#Optional: Comparative Forecast (MLR vs Ridge vs Actual)
# (For illustrative results — can remove or update manually)
example_forecast = pd.DataFrame({
    "Week": ["Week 1", "Week 2", "Week 3", "Week 4"],
    "MLR Forecast": [-0.601, 2.061, -0.984, -6.218],
    "Ridge Forecast": [-0.707, 3.134, 0.325, -4.401],
    "Actual": [-0.28, 1.70, -0.28, -3.12]
})

plt.figure(figsize=(10, 6))
x = np.arange(len(example_forecast))
width = 0.25

plt.bar(x - width, example_forecast["MLR Forecast"], width, label="MLR", color="skyblue")
plt.bar(x, example_forecast["Ridge Forecast"], width, label="Ridge", color="orange")
plt.bar(x + width, example_forecast["Actual"], width, label="Actual", color="green")

plt.xticks(x, example_forecast["Week"])
plt.title("MLR vs Ridge Forecast vs Actual (Example Month)")
plt.axhline(0, color="black", linewidth=1)
plt.legend()
plt.grid(axis="y", linestyle="--", alpha=0.6)
plt.show()


In [None]:
#Summary
print("\n===== INTERPRETATION SUMMARY =====")
print(f"Ridge: Train R²={ridge_results['Train R²']:.4f}, Test R²={ridge_results['Test R²']:.4f}")
print(f"Lasso: Train R²={lasso_results['Train R²']:.4f}, Test R²={lasso_results['Test R²']:.4f}")
print(f"Next 4-Week Ridge Forecast:\n{forecast_df}")