In [16]:
# Step 1: Import necessary libraries
from sklearn.datasets import fetch_california_housing
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np

In [17]:
# Step 2: Load the California housing price dataset
data = fetch_california_housing()
X, y = data.data, data.target

In [18]:
# Step 3: Use SelectFromModel to select top three features
estimator = RandomForestRegressor(n_estimators=100, random_state=42)
estimator.fit(X, y)

In [19]:
selector = SelectFromModel(estimator, max_features=3, threshold=-np.inf)
selector.fit(X, y)
X_selected = selector.transform(X)

In [20]:


# Print the shapes before and after feature selection
print("Shape of X before feature selection:", X.shape)
print("Shape of X after feature selection:", X_selected.shape)


Shape of X before feature selection: (20640, 8)
Shape of X after feature selection: (20640, 3)


In [21]:

# Optional: Print selected feature names
selected_feature_indices = selector.get_support(indices=True)
selected_feature_names = [data.feature_names[i] for i in selected_feature_indices]
print("Selected features:", selected_feature_names)

Selected features: ['MedInc', 'AveOccup', 'Latitude']


In [22]:
# Step 1: Import necessary libraries
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LinearRegression

# Step 2: Load the California housing price dataset (already loaded)

# Step 3: Use SequentialFeatureSelector to select top three features
estimator = LinearRegression()
sfs = SequentialFeatureSelector(estimator, n_features_to_select=3, direction='forward')
sfs.fit(X, y)

# Print the shapes before and after feature selection
X_sfs_selected = sfs.transform(X)
print("Shape of X before feature selection:", X.shape)
print("Shape of X after feature selection:", X_sfs_selected.shape)

# Optional: Print selected feature names
selected_sfs_feature_indices = sfs.get_support(indices=True)
selected_sfs_feature_names = [data.feature_names[i] for i in selected_sfs_feature_indices]
print("Selected features using SFS:", selected_sfs_feature_names)


Shape of X before feature selection: (20640, 8)
Shape of X after feature selection: (20640, 3)
Selected features using SFS: ['MedInc', 'HouseAge', 'AveRooms']
