In [11]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
X, y = make_regression(n_samples=100, n_features=10, noise=0.1, random_state=42)
X = pd.DataFrame(X, columns=[f"Feature{i}" for i in range(1, 11)])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

model = LinearRegression()
model.fit(X_train_poly, y_train)

y_pred = model.predict(X_test_poly)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
adj_r2 = 1 - (1 - r2) * (len(y_test) - 1) / (len(y_test) - X_test_poly.shape[1] - 1)

print(f"R²: {r2}")
print(f"Adjusted R²: {adj_r2}")
print(f"Mean Squared Error: {mse}")


R²: 0.9999959187316214
Adjusted R²: 1.0000016498744508
Mean Squared Error: 0.24391771328706482


In [19]:
import statsmodels.api as sm
X_train_const = sm.add_constant(X_train_poly)
model = sm.OLS(y_train, X_train_const).fit()
model_summary = model.summary()
print(model_summary)





                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 2.644e+06
Date:                Thu, 06 Mar 2025   Prob (F-statistic):           3.32e-43
Time:                        01:09:00   Log-Likelihood:                 126.13
No. Observations:                  80   AIC:                            -120.3
Df Residuals:                      14   BIC:                             36.96
Df Model:                          65                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0683      0.066     -1.036      0.3

In [6]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Create the model
model = LinearRegression()

poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)  
X_test_poly = poly.transform(X_test)

selector = RFE(model, n_features_to_select=5)  
selector = selector.fit(X_train_poly, y_train)

selected_indices = selector.support_

poly_feature_names = poly.get_feature_names_out(X_train.columns)

selected_features_rfe = [poly_feature_names[i] for i in range(len(poly_feature_names)) if selected_indices[i]]

print(f"Selected Features: {selected_features_rfe}")


Selected Features: ['Feature4', 'Feature5', 'Feature6', 'Feature7', 'Feature10']


In [7]:
X_train_all = X_train_poly
X_test_all = X_test_poly

model.fit(X_train_all, y_train)

y_pred_all = model.predict(X_test_all)

r2_all = r2_score(y_test, y_pred_all)
adj_r2_all = 1 - (1 - r2_all) * (len(y_test) - 1) / (len(y_test) - X_test_all.shape[1] - 1)

print(f"R² with all features: {r2_all}")
print(f"Adjusted R² with all features: {adj_r2_all}")


R² with all features: 0.9999959187316214
Adjusted R² with all features: 1.0000016498744508


In [10]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_squared_error

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

model = LinearRegression()

rfe = RFE(estimator=model, n_features_to_select=5)
rfe.fit(X_train_poly, y_train)

X_train_backward = X_train_poly[:, rfe.support_]
X_test_backward = X_test_poly[:, rfe.support_]

rfe_forward = RFE(estimator=model, n_features_to_select=5)
rfe_forward.fit(X_train_poly, y_train)

X_train_forward = X_train_poly[:, rfe_forward.support_]
X_test_forward = X_test_poly[:, rfe_forward.support_]

rfe_bidirectional = RFE(estimator=model, n_features_to_select=5)
rfe_bidirectional.fit(X_train_poly, y_train)

X_train_bidirectional = X_train_poly[:, rfe_bidirectional.support_]
X_test_bidirectional = X_test_poly[:, rfe_bidirectional.support_]

X_train_all = X_train_poly
X_test_all = X_test_poly

def evaluate_model(X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    adj_r2 = 1 - (1 - r2) * (len(y_test) - 1) / (len(y_test) - X_test.shape[1] - 1)
    mse = mean_squared_error(y_test, y_pred)
    return r2, adj_r2, mse

print("Model with Backward Elimination:")
r2_backward, adj_r2_backward, mse_backward = evaluate_model(X_train_backward, X_test_backward, y_train, y_test)
print(f"R²: {r2_backward}")
print(f"Adjusted R²: {adj_r2_backward}")
print(f"Mean Squared Error: {mse_backward}")

print("\nModel with Forward Selection:")
r2_forward, adj_r2_forward, mse_forward = evaluate_model(X_train_forward, X_test_forward, y_train, y_test)
print(f"R²: {r2_forward}")
print(f"Adjusted R²: {adj_r2_forward}")
print(f"Mean Squared Error: {mse_forward}")

print("\nModel with Bidirectional Selection:")
r2_bidirectional, adj_r2_bidirectional, mse_bidirectional = evaluate_model(X_train_bidirectional, X_test_bidirectional, y_train, y_test)
print(f"R²: {r2_bidirectional}")
print(f"Adjusted R²: {adj_r2_bidirectional}")
print(f"Mean Squared Error: {mse_bidirectional}")

print("\nModel with All Variables:")
r2_all, adj_r2_all, mse_all = evaluate_model(X_train_all, X_test_all, y_train, y_test)
print(f"R²: {r2_all}")
print(f"Adjusted R²: {adj_r2_all}")
print(f"Mean Squared Error: {mse_all}")


Model with Backward Elimination:
R²: 0.9297741789657031
Adjusted R²: 0.9046935285963114
Mean Squared Error: 4197.058387549051

Model with Forward Selection:
R²: 0.9297741789657031
Adjusted R²: 0.9046935285963114
Mean Squared Error: 4197.058387549051

Model with Bidirectional Selection:
R²: 0.9297741789657031
Adjusted R²: 0.9046935285963114
Mean Squared Error: 4197.058387549051

Model with All Variables:
R²: 0.9999959187316214
Adjusted R²: 1.0000016498744508
Mean Squared Error: 0.24391771328706482
