In [None]:
# Import necessary packages
import pandas as pd
import statsmodels.api as sm
import itertools
import numpy as np

# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/FE-GWP1_model_selecxtion_1.csv')

# Define response and predictors
y = df['Y']
X = df[['X1', 'X2', 'X3', 'X4', 'X5']]

# Add constant for intercept
X_with_const = sm.add_constant(X)

# Fit full model
full_model = sm.OLS(y, X_with_const).fit()
print("Full Model Summary:")
print(full_model.summary())

Full Model Summary:
                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.649
Model:                            OLS   Adj. R-squared:                  0.630
Method:                 Least Squares   F-statistic:                     34.74
Date:                Mon, 16 Jun 2025   Prob (F-statistic):           5.83e-20
Time:                        04:32:53   Log-Likelihood:                -125.30
No. Observations:                 100   AIC:                             262.6
Df Residuals:                      94   BIC:                             278.2
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.1902      0.090

In [None]:
def backward_elimination(X, y):
    remaining = list(X.columns)
    best_adj_r2 = -np.inf
    best_model = None

    while len(remaining) > 0:
        models = []
        adj_r2_values = []

        for combo in itertools.combinations(remaining, len(remaining)-1):
            X_combo = sm.add_constant(X[list(combo)])
            model = sm.OLS(y, X_combo).fit()
            models.append(model)
            adj_r2_values.append(model.rsquared_adj)

        max_adj_r2 = max(adj_r2_values)
        if max_adj_r2 > best_adj_r2:
            best_adj_r2 = max_adj_r2
            best_model = models[adj_r2_values.index(max_adj_r2)]
            remaining = list(best_model.model.exog_names)
            remaining.remove('const')
        else:
            break

    return best_model

best_backward_model = backward_elimination(X, y)
print("\nBest Model from Backward Elimination:")
print(best_backward_model.summary())



Best Model from Backward Elimination:
                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.649
Model:                            OLS   Adj. R-squared:                  0.634
Method:                 Least Squares   F-statistic:                     43.87
Date:                Mon, 16 Jun 2025   Prob (F-statistic):           8.29e-21
Time:                        04:33:36   Log-Likelihood:                -125.31
No. Observations:                 100   AIC:                             260.6
Df Residuals:                      95   BIC:                             273.6
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        

In [None]:
def forward_selection(X, y, criterion='aic'):
    initial_features = []
    remaining_features = list(X.columns)
    best_score = np.inf
    best_model = None

    while remaining_features:
        scores_with_candidates = []

        for candidate in remaining_features:
            features = initial_features + [candidate]
            X_candidate = sm.add_constant(X[features])
            model = sm.OLS(y, X_candidate).fit()
            score = model.aic if criterion == 'aic' else model.bic
            scores_with_candidates.append((score, candidate, model))

        scores_with_candidates.sort()
        best_new_score, best_candidate, candidate_model = scores_with_candidates[0]

        if best_new_score < best_score:
            best_score = best_new_score
            best_model = candidate_model
            initial_features.append(best_candidate)
            remaining_features.remove(best_candidate)
        else:
            break

    return best_model

# Run forward selection using AIC
forward_model_aic = forward_selection(X, y, criterion='aic')
print("\nBest Forward Selection Model (AIC):")
print(forward_model_aic.summary())

# Run forward selection using BIC
forward_model_bic = forward_selection(X, y, criterion='bic')
print("\nBest Forward Selection Model (BIC):")
print(forward_model_bic.summary())



Best Forward Selection Model (AIC):
                            OLS Regression Results                            
Dep. Variable:                      Y   R-squared:                       0.649
Model:                            OLS   Adj. R-squared:                  0.634
Method:                 Least Squares   F-statistic:                     43.87
Date:                Mon, 16 Jun 2025   Prob (F-statistic):           8.29e-21
Time:                        04:34:20   Log-Likelihood:                -125.31
No. Observations:                 100   AIC:                             260.6
Df Residuals:                      95   BIC:                             273.6
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          