*you can tune 2+ models using the same grid search! Here's how:*
1. *Create multiple parameter dictionaries*
2. *Specify the model within each dictionary*
3. *put the dictionaries in a list*

In [31]:
import pandas as pd

# Feature matrix
X = pd.DataFrame({
    "Sex": ["male", "female", "female", "male", "male", "female", "male", "female"],
    "Name": [
        "John Smith",
        "Emily Rose",
        "Anna Taylor",
        "Michael Brown",
        "David Miller",
        "Sophia Wilson",
        "James Anderson",
        "Olivia Thomas"
    ],
    "Age": [22, 35, 28, 42, 30, None, 25, 40]
})

# Target variable (binary classification)
y = [0, 1, 1, 0, 0, 1, 0, 1]

In [32]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.impute import SimpleImputer

#this will be the first Pipeline step
ct = ColumnTransformer([
    ('ohe', OneHotEncoder(), ['Sex']),
    ('vectorizer', CountVectorizer(), 'Name'),
    ('impute', SimpleImputer(), ['Age'])
])

In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

#each of these model will take a turn as the second Pipeline step
clf1 = LogisticRegression(solver='liblinear', random_state=1)
clf2 = RandomForestClassifier(random_state=1)

In [34]:
from sklearn.pipeline import Pipeline

#create a Pipeline
pipe = Pipeline([('preprocessor', ct), ('classifier', clf1)])

In [35]:
#create the parameter dictionary for clf1
params1 = {
    'preprocessor__vectorizer__ngram_range': [(1,1), (1,2)],
    # 'classifier__penalty': ['l1', 'l2'],
    'classifier__C': [0.1, 1, 10],
    'classifier': [clf1]
}

In [36]:
#create the parameter dictionary for clf2
params2 = {
    # 'preprocessor__vectorizer__ngram_range': [(1,1), (1,2)],
    'classifier__n_estimators': ['50', '100'],
    'classifier__max_depth': [None, 5, 10],
    # 'classifier__min_sample_leaf': [1, 2],
    'classifier': [clf2]
}

In [37]:
#create a list of parameter dictionaries
params = [params1, params2]

In [40]:
from warnings import filterwarnings
filterwarnings('ignore')

In [41]:
from sklearn.model_selection import GridSearchCV

#this will search every parameter combination within each dictionary
grid = GridSearchCV(pipe, params, cv=3)
grid.fit(X,y)
grid.best_params_

{'classifier': LogisticRegression(random_state=1, solver='liblinear'),
 'classifier__C': 10,
 'preprocessor__vectorizer__ngram_range': (1, 1)}

> *the best combination was a logistic regression model with param..(above)*