WEEKLY MULTIPLE LINEAR REGRESSION FORECAST

In [None]:
#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.stattools import durbin_watson
from scipy.stats import shapiro
from statsmodels.stats.outliers_influence import variance_inflation_factor

sns.set_theme(style="whitegrid")

In [None]:
#Load Data
# Update this path or wrap in a function later to load any dataset
data_path = "your_data.csv"   # <-- replace with your dataset
target_col = "NFTY_Weekly_Return"
df = pd.read_csv(data_path)
print(f"Data Loaded: {df.shape[0]} rows, {df.shape[1]} columns")


In [None]:
#Define Features & Target
X = df.drop(columns=[target_col, "NFTY_Return_Lag_2W", "NFTY_Return_Lag_1W"], errors="ignore")
y = df[target_col]

In [None]:
#Train-Test Split
train_size = int(len(df) * 0.8)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Train size: {len(X_train)}, Test size: {len(X_test)}")

In [None]:
#fit  MLR Model
model = LinearRegression()
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Train R²: {train_r2:.4f}")
print(f"Test R²: {test_r2:.4f}")

In [None]:
#Visualizations - Actual vs Predicted
plt.figure(figsize=(14, 7))
plt.plot(y_test.values, label="Actual", marker='o')
plt.plot(y_test_pred, label="Predicted", linestyle='--', marker='x')
plt.legend()
plt.title("Actual vs Predicted - Weekly Returns")
plt.grid(True)
plt.show()

# Bar Comparison (First 15 Weeks)
plt.figure(figsize=(14, 7))
plt.bar(range(15), y_test[:15], label="Actual", color="skyblue")
plt.bar(range(15), y_test_pred[:15], label="Predicted", color="orange", alpha=0.7)
plt.legend()
plt.title("Actual vs Predicted - First 15 Weeks")
plt.grid(True)
plt.show()


In [None]:
#OLS Summary (for detailed stats)
X_train_ols = sm.add_constant(X_train)
ols_model = sm.OLS(y_train, X_train_ols).fit()
print(ols_model.summary())

In [None]:
#Feature Importance
coefficients = pd.DataFrame(model.coef_, index=X.columns, columns=["Coefficient"])
coefficients = coefficients.sort_values(by="Coefficient", ascending=False)

plt.figure(figsize=(14, 7))
sns.barplot(x="Coefficient", y=coefficients.index, data=coefficients, palette="coolwarm")
plt.title("Feature Importance - MLR Coefficients")
plt.grid(True)
plt.show()

print("\nTop 5 Features driving Weekly Returns:")
print(coefficients.head())

In [None]:
#Residual Analysis
residuals = y_test.values - y_test_pred

plt.figure(figsize=(12, 6))
plt.scatter(y_test_pred, residuals, alpha=0.7, edgecolor='k')
plt.axhline(0, color='red', linestyle='--')
plt.title("Residual Plot")
plt.grid(True)
plt.show()

In [None]:
#Statistical Tests
shapiro_test = shapiro(residuals)
bp_test = het_breuschpagan(residuals, sm.add_constant(X_test))
dw_stat = durbin_watson(residuals)

print("\n--- Diagnostic Tests ---")
print(f"Shapiro-Wilk p-value (Normality): {shapiro_test.pvalue:.4f}")
print(f"Breusch-Pagan p-value (Homoskedasticity): {bp_test[1]:.4f}")
print(f"Durbin-Watson Statistic (Autocorrelation): {dw_stat:.2f}")

normality = "Normal" if shapiro_test.pvalue > 0.05 else "Not Normal"
homoskedasticity = "Homoskedastic" if bp_test[1] > 0.05 else "Heteroskedastic"
autocorr = "No Strong Autocorrelation" if 1.5 < dw_stat < 2.5 else "Possible Autocorrelation"

In [None]:
#Multicollinearity Check
vif_data = pd.DataFrame()
vif_data["Feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print("\nVIF Analysis (Multicollinearity Check):")
print(vif_data)

high_vif = vif_data[vif_data["VIF"] > 5]
if not high_vif.empty:
    print("\nHigh VIF Features:")
    print(high_vif)

In [None]:
#orrelation Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(X.corr(), cmap="coolwarm", annot=False, linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:
#Evaluation Metrics
rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
mae = mean_absolute_error(y_test, y_test_pred)
mape = np.mean(np.abs((y_test - y_test_pred) / (y_test + 1e-10))) * 100

print("\n--- Evaluation Metrics ---")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"MAPE: {mape:.2f}%")

In [None]:
#Future Forecasts (Configurable)
forecast_weeks = 4   # You can change this
X_future = X.iloc[[-1]].values

future_forecast = []
for _ in range(forecast_weeks):
    next_pred = model.predict(X_future)[0]
    future_forecast.append(next_pred)
    X_future = np.roll(X_future, -1)
    X_future[0, -1] = next_pred  # update last col

plt.figure(figsize=(12, 6))
weeks_past = np.arange(len(y_test[-15:]))
weeks_future = np.arange(len(y_test[-15:]), len(y_test[-15:]) + forecast_weeks)

plt.plot(weeks_past, y_test[-15:], label='Actual (Last 15 Weeks)', marker='o')
plt.plot(weeks_past, y_test_pred[-15:], label='Predicted (Last 15 Weeks)', linestyle='--', marker='x')
plt.plot(weeks_future, future_forecast, label=f'Next {forecast_weeks} Weeks Forecast', color='red', marker='D')
plt.legend()
plt.title(f"NIFTY Weekly Return Forecast ({forecast_weeks} Weeks Ahead)")
plt.grid(True)
plt.show()

forecast_df = pd.DataFrame({
    "Week": [f"Week +{i+1}" for i in range(forecast_weeks)],
    "Forecast": future_forecast
})
print("\nNext Weeks Forecast:")
print(forecast_df)


In [None]:
#Summary Report
print("\n===== Interpretation Summary =====")
print(f"Train R²: {train_r2:.4f}")
print(f"Test R²: {test_r2:.4f}")
print(f"Normality: {normality}")
print(f"Homoskedasticity: {homoskedasticity}")
print(f"Autocorrelation: {autocorr}")
print(f"Multicollinearity: {'Detected' if not high_vif.empty else 'Not Detected'}")