In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from sklearn.feature_selection import RFE
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")


In [2]:
dataset1=pd.read_csv("preprocessed_house_rent.csv",index_col=None)
df2=dataset1
df2 = pd.get_dummies(df2, drop_first=True,dtype=int)
indep_X=df2.drop('Total Floors',axis=1)
dep_Y=df2['Total Floors']


In [3]:
df2

Unnamed: 0,BHK,Rent,Size,Area Type,Furnishing Status,Tenant Preferred,Bathroom,City_Chennai,City_Delhi,City_Hyderabad,City_Kolkata,City_Mumbai,Floor Number,Total Floors
0,2,10000,1100,2,2,1,2,0,0,0,1,0,0,2
1,2,20000,800,2,1,1,1,0,0,0,1,0,1,3
2,2,17000,1000,2,1,1,1,0,0,0,1,0,1,3
3,2,10000,800,2,2,1,1,0,0,0,1,0,1,2
4,2,7500,850,1,2,0,1,0,0,0,1,0,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4741,2,15000,1000,1,1,1,2,0,0,1,0,0,3,5
4742,3,29000,2000,2,1,1,3,0,0,1,0,0,1,4
4743,3,35000,1750,1,1,1,3,0,0,1,0,0,3,5
4744,3,45000,1500,1,1,2,2,0,0,1,0,0,23,34


In [4]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.2, random_state=42)


In [5]:
# Standardizing the features (important for models like SVR)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [6]:
# Define models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(random_state=42),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
     "LinearSVR": LinearSVR()
}


In [7]:
# Apply RFE for each model and evaluate the performance
def rfeFeatureSelection(X_train, y_train, X_test, y_test, models, n_features=5):
    results = []
    for model_name, model in models.items():
# Apply RFE for feature selection
        rfe = RFE(model, n_features_to_select=n_features)
        rfe.fit(X_train, y_train)
        
        # Get selected features
        selected_features = X_train.columns[rfe.support_]
        
        # Refit model using selected features
        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]
        
        # Train the model on selected features
        model.fit(X_train_selected, y_train)
         
        # Predict on test set
        y_pred = model.predict(X_test_selected)
        
        # Evaluate model performance
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        
        # Store results
        results.append({
            'Model': model_name,
            'Selected Features': selected_features,
            'MAE': mae,
            'MSE': mse,
            'RMSE': rmse,
            'R2': r2
        })
    return pd.DataFrame(results)
#Run RFE feature selection and evaluate models
results_df = rfeFeatureSelection(X_train, y_train, X_test, y_test, models)

#Display results
print(results_df)


                     Model                                  Selected Features  \
0        Linear Regression  Index(['BHK', 'Bathroom', 'City_Delhi', 'City_...   
1  Decision Tree Regressor  Index(['Rent', 'Size', 'Furnishing Status', 'C...   
2  Random Forest Regressor  Index(['Rent', 'Size', 'Bathroom', 'City_Mumba...   
3                LinearSVR  Index(['Bathroom', 'City_Chennai', 'City_Hyder...   

        MAE        MSE      RMSE        R2  
0  2.468588  20.574015  4.535859  0.807576  
1  2.813649  32.759147  5.723561  0.693611  
2  2.435999  20.433979  4.520396  0.808886  
3  2.307423  22.530114  4.746590  0.789281  
