In [16]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the Boston Housing dataset
data = load_boston()
X = data.data
y = data.target
feature_names = data.feature_names

# Preprocess the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


y_bar = np.mean(y_train)

best_r2 = -np.inf
best_feature = None

# Iterate over each feature
for feature_idx in range(X_train.shape[1]):
    # Select the current feature
    X_train_feature = X_train[:, feature_idx].reshape(-1, 1)
    X_test_feature = X_test[:, feature_idx].reshape(-1, 1)
    
    # Create a Linear Regression model
    model = LinearRegression()
    
    # Fit the model
    model.fit(X_train_feature, y_train)
    
    # Predictions
    y_pred = model.predict(X_test_feature)
    
    # Calculate SSR, SSE, SST
    SSR = np.sum((y_pred - y_bar) ** 2)
    SSE = np.sum((y_test - y_pred) ** 2)
    SST = np.sum((y_test - y_bar) ** 2)
    
    # Calculate R²
    r2 = SSR / SST
    
    # Check if this model is the best one
    if r2 > best_r2:
        best_r2 = r2
        best_feature = feature_names[feature_idx]

# Print the best feature and its R² value
print("Best Feature:", best_feature)
print("Best R²:", best_r2)


Best Feature: LSTAT
Best R²: 0.6586639758194484
