In [None]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_regression, RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_csv('/mnt/data/house.csv')

# Define target and potential numeric features
target = 'Price'
# Drop non-informative or non-numeric columns for demonstration
drop_cols = ['Unnamed: 0', 'Suburb', 'Address', 'Type', 'Method', 'SellerG', 'Date', 
             'CouncilArea', 'Regionname']
numeric_cols = [col for col in df.columns if col not in drop_cols + [target]]

# Prepare X and y
X = df[numeric_cols]
y = df[target]

# Impute missing values with median
imputer = SimpleImputer(strategy='median')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=numeric_cols)

# 1. SelectKBest for regression (f_regression)
skb = SelectKBest(score_func=f_regression, k=5)
skb.fit(X_imputed, y)
skb_scores = pd.DataFrame({
    'Feature': numeric_cols,
    'F-score': skb.scores_
}).sort_values(by='F-score', ascending=False)

# 2. RFE with LinearRegression
lr = LinearRegression()
rfe = RFE(estimator=lr, n_features_to_select=5)
rfe.fit(X_imputed, y)
rfe_support = pd.DataFrame({
    'Feature': numeric_cols,
    'RFE_Selected': rfe.support_.astype(int)
}).sort_values(by='RFE_Selected', ascending=False)

# 3. Feature importance with RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_imputed, y)
rf_importances = pd.DataFrame({
    'Feature': numeric_cols,
    'Importance': rf.feature_importances_
}).sort_values(by='Importance', ascending=False)

# Display results to user
import ace_tools as tools
tools.display_dataframe_to_user(name="SelectKBest (Top 5 by F-score)", dataframe=skb_scores.head(5))
tools.display_dataframe_to_user(name="RFE Selected Features (Top 5)", dataframe=rfe_support[rfe_support['RFE_Selected'] == 1])
tools.display_dataframe_to_user(name="Random Forest Feature Importances (Top 5)", dataframe=rf_importances.head(5))

# Also print summary
skb_top5 = skb_scores.head(5)['Feature'].tolist()
rfe_top5 = rfe_support[rfe_support['RFE_Selected'] == 1]['Feature'].tolist()
rf_top5 = rf_importances.head(5)['Feature'].tolist()

print("Summary of selected features:")
print("- SelectKBest top 5:", skb_top5)
print("- RFE top 5:", rfe_top5)
print("- Random Forest top 5:", rf_top5)
