In [26]:
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

#Remove the warning
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Load the data set
from sklearn.datasets import load_iris
iris=load_iris()
iris
x=iris.data
y=iris.target


In [30]:
#Define the model
model=RandomForestClassifier()

#Create the parameter grid
param_grid={
    "n_estimators":[50,100,200,300,400,500],
    "max_depth":[4,5,6,7,8,9,10],
    "criterion":["gini","entropy"],
    "bootstrap":[True,False]

}

#set the grid
grid=GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=5,#cross validation
    scoring="accuracy",
    verbose=1,
    n_jobs=-1
)

#fit the model
grid.fit(x,y)

#print the best parameter
print(f"Best parameter {grid.best_params_}")

Fitting 5 folds for each of 168 candidates, totalling 840 fits
Best parameter {'bootstrap': True, 'criterion': 'gini', 'max_depth': 4, 'n_estimators': 100}


In [23]:
df=sns.load_dataset("tips")
for i in df:
    if df[i].dtype =="category":
        df[i]=LabelEncoder().fit_transform(df[i])
#Seprate the feature and target variable
X=df.drop("tip",axis=1)
Y=df["tip"]



In [27]:
#build the model

model=RandomForestClassifier()
# Define the parameter grid and RandomForestRegressor model
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}


# Set up GridSearchCV with a regression-appropriate scoring metric
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',  # or 'r2', 'neg_mean_absolute_error'
    cv=5,
    n_jobs=-1
)
# Fit the model to find the best parameters
grid_search.fit(X,Y)

# Output best parameters and score
print("Best parameters found:", grid_search.best_params_)
print("Best negative MSE score:", grid_search.best_score_)

Best parameters found: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 50}
Best negative MSE score: -1.0944975638095376


# RandomizedSearchCV

In [7]:
%%time
#for randomized search cv
from sklearn.model_selection import RandomizedSearchCV
#Define the model
model=RandomForestClassifier()

#Create the parameter grid
param_grid={
    "n_estimators":[50,100,200,300,400,500],
    "max_depth":[4,5,6,7,8,9,10],
    "criterion":["gini","entropy"],
    "bootstrap":[True,False]

}

#set the grid
grid=RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    cv=5,#cross validation
    scoring="accuracy",
    verbose=1,
    n_jobs=-1,
    n_iter=20
)

#fit the model
grid.fit(x,y)

#print the best parameter
print(f"Best parameter {grid.best_params_}")

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameter {'n_estimators': 300, 'max_depth': 5, 'criterion': 'gini', 'bootstrap': True}
CPU times: total: 672 ms
Wall time: 12 s
