In [2]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import Lasso,Ridge
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [3]:
df=pd.read_csv('housing.csv')
x=df.drop(columns=['price'])
y=df['price']

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
num=x_train.select_dtypes(include=np.number).columns.tolist()
cat=x_train.select_dtypes(exclude=np.number).columns.tolist()
y_train

def pipe(num,cat):
    num_pipe=Pipeline([
        ('imputer',SimpleImputer(strategy='median')),
        ('sc',StandardScaler() )
    ])
    cat_pipe=Pipeline([
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('ohe',OneHotEncoder(drop='first',handle_unknown='ignore') )
    ])
    full_pipe=ColumnTransformer([
        ('num',num_pipe,num),
        ('cat',cat_pipe,cat)
    ])
    return full_pipe
# preprocessor=pipe(num,cat)
# x_train=preprocessor.fit_transform(x_train)
# x_test=preprocessor.transform(x_test)

In [5]:
models={
    'Ridge':{
        'model':Ridge(),
        'params':{
            'regressor__alpha':[0.01,0.1,1,10,100]
        }},
    'Lasso':{
        'model':Lasso(),
        'params':{
            'regressor__alpha':[0.001,0.01,0.1,1,10]
        }},
    'RandomForest':{
        'model':RandomForestRegressor(),
        'params': {
            "regressor__n_estimators": [100, 200 ,300],
            "regressor__max_depth": [None, 5, 10],
            "regressor__min_samples_split": [2, 5]
        }}
}
results=[]
for name,m in models.items():
    print(f"Running GridSearchCV for {name}")
    pipe_model=Pipeline([
        ('preprocessor',pipe(num,cat)),
        ('regressor',m['model'])
    ])
    grid = GridSearchCV(
        estimator=pipe_model,
        param_grid=m["params"],
        cv=5,
        scoring="neg_root_mean_squared_error",
        n_jobs=-1
    )
    
    grid.fit(x_train, y_train)

    y_pred = grid.predict(x_test)
    rmse=np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    print(f"{name} Best Params: {grid.best_params_}")
    print(f"{name} Test RMSE: {rmse:.2f}, MAE: {mae:.2f}")
    print("-"*40)
    
    results.append({
        "Model": name,
        "Best_Params": grid.best_params_,
        "Test_RMSE": rmse,
        "Test_MAE": mae
    })
results_df = pd.DataFrame(results)
print("\nModel Comparison:\n", results_df)



Running GridSearchCV for Ridge
Ridge Best Params: {'regressor__alpha': 10}
Ridge Test RMSE: 1338647.47, MAE: 979663.50
----------------------------------------
Running GridSearchCV for Lasso
Lasso Best Params: {'regressor__alpha': 10}
Lasso Test RMSE: 1324518.59, MAE: 970049.58
----------------------------------------
Running GridSearchCV for RandomForest
RandomForest Best Params: {'regressor__max_depth': 10, 'regressor__min_samples_split': 5, 'regressor__n_estimators': 300}
RandomForest Test RMSE: 1398508.27, MAE: 1019448.36
----------------------------------------

Model Comparison:
           Model                                        Best_Params  \
0         Ridge                           {'regressor__alpha': 10}   
1         Lasso                           {'regressor__alpha': 10}   
2  RandomForest  {'regressor__max_depth': 10, 'regressor__min_s...   

      Test_RMSE      Test_MAE  
0  1.338647e+06  9.796635e+05  
1  1.324519e+06  9.700496e+05  
2  1.398508e+06  1.019448e+06 