In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline,make_pipeline

In [4]:
df=sns.load_dataset("tips")
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [5]:
X=df.iloc[:,1:]
y=df.iloc[:,:1]

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.2)


In [7]:
numeric_processor=Pipeline(
    steps=[
        ("imputation_mean",SimpleImputer(missing_values=np.nan,strategy='mean')),
        ("scalar",StandardScaler())
    ]
)

In [8]:
numeric_processor

In [9]:
categorical_processor=Pipeline(
    steps=[
        ("Catagorical_imputation",SimpleImputer(strategy="most_frequent")),
    ("OneHotEncoder",OneHotEncoder(handle_unknown="ignore"))
    ]
)

In [10]:
categorical_processor

In [35]:
Processor=ColumnTransformer(
    [
        ("numeric_processor",numeric_processor,["tip","size"]),
        ("categorical_processor",categorical_processor,["sex","smoker","day","time"])
    ]
    )
Processor

In [36]:
Final_Pipeline=Pipeline(
    steps=[
        ("Processor",Processor),
        ("RandomTreeRegressor",RandomForestRegressor())
        ]
)
Final_Pipeline

In [37]:
Final_Pipeline.fit(X_train,y_train)

In [38]:
y_pred=Final_Pipeline.predict(X_test)

In [39]:
from sklearn.metrics import classification_report

In [40]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


print("Regression Report:")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R-squared: {r2:.4f}")


Regression Report:
MAE: 4.6727
MSE: 41.6227
RMSE: 6.4516
R-squared: 0.5091


In [41]:
#parameters

param_grid = {
    "RandomTreeRegressor__n_estimators": [200, 500],  # Correct parameter name
    "RandomTreeRegressor__max_features": ["auto", "sqrt", "log2"],
    "RandomTreeRegressor__max_depth": [4, 5, 6, 7, 8],
}


In [42]:
import warnings
warnings.filterwarnings('ignore')

In [43]:
grid_search=GridSearchCV(Final_Pipeline,param_grid=param_grid,n_jobs=-1)
grid_search.fit(X_train,y_train)

In [44]:
grid_search.best_params_

{'RandomTreeRegressor__max_depth': 5,
 'RandomTreeRegressor__max_features': 'log2',
 'RandomTreeRegressor__n_estimators': 500}