<P> <B> <font color=yellow size="6"> Stepwise Regression with Backward Elimination Using mlxtend </Font></B> </P>

In [21]:
from sklearn.datasets import load_wine
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
import pandas as pd
import numpy as np

In [22]:
# Load the wine dataset
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = wine.target + 0.1 * wine.data[:, 0]  # Convert target to continuous for regression

In [23]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
# Initialize the Linear Regression model
lr = LinearRegression()

In [26]:
# Perform backward elimination using mlxtend
sfs = SFS(
    lr,
    k_features=4,      # Start from all features, reducing until k=1
    forward=False,     # Backward elimination
    floating=False,    # No floating; features are only added/removed
    scoring='r2',      # Use R² as the evaluation metric
    cv=5,              # 5-fold cross-validation
    n_jobs=-1          # Use all available CPU cores
)


In [27]:
# Fit the feature selector
sfs = sfs.fit(X_train_scaled, y_train)

In [28]:
# Get the selected feature indices and names
selected_indices = sfs.k_feature_idx_
selected_features = [X.columns[i] for i in selected_indices]

In [29]:
# Train a model using the selected features
X_train_selected = X_train_scaled[:, selected_indices]
X_test_selected = X_test_scaled[:, selected_indices]
lr.fit(X_train_selected, y_train)
y_pred_test = lr.predict(X_test_selected)

In [30]:
# Evaluate the model
mse_test = mean_squared_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

print("Selected Features:", selected_features)
print(f"Test Set Mean Squared Error: {mse_test:.4f}")
print(f"Test Set R² Score: {r2_test:.4f}")

Selected Features: ['flavanoids', 'color_intensity', 'od280/od315_of_diluted_wines', 'proline']
Test Set Mean Squared Error: 0.0745
Test Set R² Score: 0.8627
