# TITANİC

## Import Libraries

In [119]:
import pandas as pd
import numpy as np
from cenfo.obliging import information, report_classifier # custom library

## Import dataset

In [125]:
df = pd.read_csv('../../Machine_Learning_Algorithms/C_Datasets/titanic_train_original.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [127]:
information(df)

Number of columns: 891
Number of rows:    12
-----------------------------------------------------------
-----------------------------------------------------------
PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object
-----------------------------------------------------------
-----------------------------------------------------------
int64      5
object     5
float64    2
dtype: int64
-----------------------------------------------------------
-----------------------------------------------------------
Table of Null Values of DataFrame: 

Age         177
Cabin       687
Embarked      2
dtype: int64
-----------------------------------------------------------
-----------------------------------------------------------
Total number of null values: 866
--

## Preprocessing

- Extract courtesy title from names.
- Drop some columns.
- Arrange the dtypes.

In [129]:
# create a funtion to extract title from name columns 
def title_extract(X):
    my_list = ['Mr', 'Miss', 'Mrs', 'Master']
    return ( 
                X
                .assign(Name = (X.Name.str.split(',').apply(lambda x: x[1].strip()).str.split('.')
                                .apply(lambda x:x[0].strip()).apply(lambda x: 'Other' if x not in my_list else x)),
                        Embarked = df.Embarked.fillna('S'))
                .rename(columns={'Name': 'Title'})
                .drop(['PassengerId', 'Cabin', 'Ticket'], axis=1)
                .astype({'Title': 'category', 'Sex': 'category', 'Embarked': 'category'})
            )

In [130]:
df = title_extract(df)
df_kaggle = title_extract(df_kaggle)

In [131]:
df.head()

Unnamed: 0,Survived,Pclass,Title,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,Mr,male,22.0,1,0,7.25,S
1,1,1,Mrs,female,38.0,1,0,71.2833,C
2,1,3,Miss,female,26.0,0,0,7.925,S
3,1,1,Mrs,female,35.0,1,0,53.1,S
4,0,3,Mr,male,35.0,0,0,8.05,S


## Use ColumnTransformer for each type 

- We use ColumnTransformer for applying pipeline to each columns separately.

In [133]:
float_features = df.select_dtypes('float').columns
caterical_features = df.select_dtypes('category').columns

In [134]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [135]:
float_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop='if_binary'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', float_pipeline, float_features),
        ('cat', categorical_pipeline, caterical_features)
    ]
)

## Split data set

In [136]:
X = df.drop('Survived', axis=1)
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

## SVC()

### Import Libraries

In [137]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

### Create a Pipeline

In [138]:
pipe_svc = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('svc', SVC())
])

### Create a Parameter Grid

In [139]:
param_grid = {
    'svc__C': [8, 10, 12, 14, 20],
    'svc__gamma': [0.1],
    'svc__kernel': ['rbf']
}

### Create and Fit the Grid Search

In [140]:
grid_search = GridSearchCV(pipe_svc, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

### Show the Result of Grid Search

In [141]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.2f}')

Best Parameters: {'svc__C': 8, 'svc__gamma': 0.1, 'svc__kernel': 'rbf'}
Best Score: 0.80


### Apply Results to Model

In [142]:
# create a pipeline
pipe_svc = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('svc', SVC(C=8, gamma=0.1, kernel='rbf'))
])

# Fit the pipeline
pipe_svc.fit(X_train, y_train)

# Make predictions
y_pred = pipe_svc.predict(X_test)

# Show report
svc_report = report_classifier(y_test, y_pred, 'SVC_model')
svc_report

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,SVC_model,0.784753,0.725275,0.741573,0.733333


## KNN()

#### Import Libraries

In [143]:
from sklearn.neighbors import KNeighborsClassifier

### Create a Pipeline

In [144]:
pipe_knn = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('knn', KNeighborsClassifier())
])

### Create a Parameter Grid

In [145]:
param_grid = {
    'knn__n_neighbors': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20],
    'knn__leaf_size': [5, 10, 20, 30, 40, 50],
    'knn__metric': ['euclidean', 'manhattan', 'minkowski']
}

### Create and Fit the Grid Search

In [146]:
grid_search = GridSearchCV(pipe_knn, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

### Show the Result of Grid Search

In [147]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.2f}')

Best Parameters: {'knn__leaf_size': 5, 'knn__metric': 'manhattan', 'knn__n_neighbors': 11}
Best Score: 0.80


### Apply Results to Model

In [148]:
# create a pipeline
pipe_knn = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('knn', KNeighborsClassifier(metric='manhattan', n_neighbors=11, leaf_size=5))
])

# Fit the pipeline
pipe_knn.fit(X_train, y_train)

# Make predictions
y_pred = pipe_knn.predict(X_test)

# Show report
knn_report = report_classifier(y_test, y_pred, 'KNN_model')
knn_report

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,KNN_model,0.762332,0.714286,0.674157,0.693642


## LogisticRegression()

### Import Libraries

In [149]:
from sklearn.linear_model import LogisticRegression as LR

### Create a Pipeline

In [150]:
pipe_LR = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('LR', LR())
])

### Create a Parameter Grid

In [151]:
param_grid_1 = {
    'LR__solver': ['saga', 'liblinear'],
    'LR__penalty': ['l1'],
    'LR__C': [0.001, 0.01, 0.1, 1, 10, 100]
}

param_grid_2 = {
    'LR__solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
    'LR__penalty': ['l2'],
    'LR__C': [0.001, 0.01, 0.1, 1, 10, 100]
}

### Create and Fit the Grid Search

In [152]:
grid_search_1 = GridSearchCV(pipe_LR, param_grid_1, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_1.fit(X_train, y_train)

In [153]:
grid_search_2 = GridSearchCV(pipe_LR, param_grid_2, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_2.fit(X_train, y_train)

### Show the Result of Grid Search

In [154]:
print(f'Best Parameters: {grid_search_1.best_params_}')
print(f'Best Score: {grid_search_1.best_score_:.2f}')

Best Parameters: {'LR__C': 0.1, 'LR__penalty': 'l1', 'LR__solver': 'liblinear'}
Best Score: 0.78


In [155]:
print(f'Best Parameters: {grid_search_2.best_params_}')
print(f'Best Score: {grid_search_2.best_score_:.2f}')

Best Parameters: {'LR__C': 0.1, 'LR__penalty': 'l2', 'LR__solver': 'lbfgs'}
Best Score: 0.78


### Apply Results to Model

In [156]:
# create a pipeline
pipe_LR = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('LR', LR(C=0.1, penalty='l2', solver='lbfgs'))
])

# Fit the pipeline
pipe_LR.fit(X_train, y_train)

# Make predictions
y_pred = pipe_LR.predict(X_test)

# Show report
LR_report = report_classifier(y_test, y_pred, 'LR_model')
LR_report

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,LR_model,0.793722,0.736264,0.752809,0.744444


## DecisionTreeClassifier()

### Import Libraries

In [157]:
from sklearn.tree import DecisionTreeClassifier as tree

### Create a Pipeline

In [158]:
pipe_tree = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('tree', tree())
])

### Create a Parameter Grid

In [161]:
param_grid = {
    'tree__max_depth': [2, 5, 10, 15, 20, 30],
    'tree__ccp_alpha': [0.0, 0.01, 0.1, 1, 10, 100],
    'tree__min_samples_split': [2, 4, 6, 8, 10],
    'tree__min_samples_leaf': [1, 3, 5, 7, 9, 10]
}

### Create and Fit the Grid Search

In [162]:
grid_search = GridSearchCV(pipe_tree, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

### Show the Result of Grid Search

In [163]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.2f}')

Best Parameters: {'tree__ccp_alpha': 0.0, 'tree__max_depth': 10, 'tree__min_samples_leaf': 1, 'tree__min_samples_split': 4}
Best Score: 0.78


### Apply Results to Model

In [165]:
# create a pipeline
pipe_tree = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('tree', tree(ccp_alpha=0.0, max_depth=10, min_samples_leaf=1, min_samples_split=4))
])

# Fit the pipeline
pipe_tree.fit(X_train, y_train)

# Make predictions
y_pred = pipe_tree.predict(X_test)

# Show report
tree_report = report_classifier(y_test, y_pred, 'tree_model')
tree_report

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,tree_model,0.798206,0.768293,0.707865,0.736842


## RandomForestClassifier()

### Import Libraries

In [166]:
from sklearn.ensemble import RandomForestClassifier as forest

### Create a Pipeline

In [169]:
pipe_forest = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('forest', forest())
])

### Create a Parameter Grid

In [176]:
param_grid = {
    'forest__ccp_alpha': [0.0, 0.001, 0.01, 0.1, 1, 10, 100],
    'forest__n_estimators': [10, 30, 50, 60, 100],
    'forest__criterion': ['gini', 'entropy', 'log_loss']
}

### Create and Fit the Grid Search

In [174]:
grid_search = GridSearchCV(pipe_forest, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

### Show the Result of Grid Search

In [175]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.2f}')

Best Parameters: {'forest__ccp_alpha': 0.01, 'forest__criterion': 'entropy', 'forest__n_estimators': 30}
Best Score: 0.80


### Apply Results to Model

In [179]:
# create a pipeline
pipe_forest = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('forest', forest(ccp_alpha=0.01, criterion='entropy', n_estimators=30))
])

# Fit the pipeline
pipe_forest.fit(X_train, y_train)

# Make predictions
y_pred = pipe_forest.predict(X_test)

# Show report
forest_report = report_classifier(y_test, y_pred, 'forest_model')
forest_report

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,forest_model,0.793722,0.736264,0.752809,0.744444


## GradientBoostingClassifier()

### Import Libraries

In [181]:
from sklearn.ensemble import GradientBoostingClassifier as boosting

### Create a Pipeline

In [182]:
pipe_boosting = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('boosting', boosting())
])

### Create a Parameter Grid

In [183]:
param_grid = {
    'boosting__learning_rate': [0.001, 0.01, 0.1],
    'boosting__n_estimators': [10, 30, 50, 60, 100],
    'boosting__max_leaf_nodes': [5,  15, 25, 35, None],
    'boosting__ccp_alpha': [0.01, 0.1, 0.0, 1.0, 10.0],
    'boosting__max_depth': [2, 4, 6, 8, 10]
}

### Create and Fit the Grid Search

In [184]:
grid_search = GridSearchCV(pipe_boosting, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

### Show the Result of Grid Search

In [185]:
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Score: {grid_search.best_score_:.2f}')

Best Parameters: {'boosting__ccp_alpha': 0.0, 'boosting__learning_rate': 0.1, 'boosting__max_depth': 4, 'boosting__max_leaf_nodes': 15, 'boosting__n_estimators': 50}
Best Score: 0.82


### Apply Results to Model

In [186]:
# create a pipeline
pipe_boosting = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('boosting', boosting(ccp_alpha=0.0, learning_rate=0.1, max_depth=4, max_leaf_nodes=15, n_estimators=50))
])

# Fit the pipeline
pipe_boosting.fit(X_train, y_train)

# Make predictions
y_pred = pipe_boosting.predict(X_test)

# Show report
boosting_report = report_classifier(y_test, y_pred, 'boosting_model')
boosting_report

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,boosting_model,0.825112,0.797619,0.752809,0.774566


In [187]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
titanic = pd.read_csv('../../Machine_Learning_Algorithms/C_Datasets/titanic_train_original.csv')

# Separate features and target
X = titanic.drop(columns='Survived')
y = titanic['Survived']

# Identify numerical and categorical features
numeric_features = ['Age', 'Fare', 'Parch', 'SibSp']
categorical_features = ['Pclass', 'Sex', 'Embarked']

# Numeric pipeline
numeric_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Categorical pipeline
categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_pipeline, numeric_features),
        ('cat', categorical_pipeline, categorical_features)
    ]
)

# Create the full pipeline with RandomForestClassifier
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Define the parameter grid
param_grid = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [None, 10, 20, 30],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__min_samples_leaf': [1, 2, 4]
}

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get best parameters and estimator
best_model = grid_search.best_estimator_
print(f'Best parameters found: {grid_search.best_params_}')

# Evaluate the model
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy}')


Best parameters found: {'classifier__max_depth': 10, 'classifier__min_samples_leaf': 4, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100}
Test accuracy: 0.8100558659217877


In [188]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin

# Load the dataset
titanic = pd.read_csv('../../Machine_Learning_Algorithms/C_Datasets/titanic_train_original.csv')

# Feature Engineering Functions
def extract_title(name):
    return name.split(',')[1].split('.')[0].strip()

def extract_deck(cabin):
    return cabin[0] if pd.notna(cabin) else 'M'

# Feature Engineering
titanic['Title'] = titanic['Name'].apply(extract_title)
titanic['FamilySize'] = titanic['SibSp'] + titanic['Parch'] + 1
titanic['IsAlone'] = (titanic['FamilySize'] == 1).astype(int)
titanic['Deck'] = titanic['Cabin'].apply(extract_deck)

# Drop unnecessary columns
titanic.drop(columns=['Name', 'Ticket', 'Cabin'], inplace=True)

# Separate features and target
X = titanic.drop(columns='Survived')
y = titanic['Survived']

# Identify numerical and categorical features
numeric_features = ['Age', 'Fare', 'Parch', 'SibSp', 'FamilySize']
categorical_features = ['Pclass', 'Sex', 'Embarked', 'Title', 'Deck', 'IsAlone']

# Numeric pipeline
numeric_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Categorical pipeline
categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_pipeline, numeric_features),
        ('cat', categorical_pipeline, categorical_features)
    ]
)

# Create the full pipeline with RandomForestClassifier and GradientBoostingClassifier
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Define the parameter grid for RandomForestClassifier
param_grid_rf = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [None, 10, 20, 30],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__min_samples_leaf': [1, 2, 4]
}

# Optionally, define the parameter grid for GradientBoostingClassifier
param_grid_gb = {
    'classifier': [GradientBoostingClassifier(random_state=42)],
    'classifier__n_estimators': [50, 100, 200],
    'classifier__learning_rate': [0.01, 0.1, 0.2],
    'classifier__max_depth': [3, 5, 7],
    'classifier__min_samples_split': [2, 5, 10],
    'classifier__min_samples_leaf': [1, 2, 4]
}

# Combine parameter grids
param_grid = [
    param_grid_rf,
    param_grid_gb
]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get best parameters and estimator
best_model = grid_search.best_estimator_
print(f'Best parameters found: {grid_search.best_params_}')

# Evaluate the model
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy}')


Best parameters found: {'classifier__max_depth': 10, 'classifier__min_samples_leaf': 2, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100}
Test accuracy: 0.8379888268156425


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.base import BaseEstimator, TransformerMixin
from imblearn.over_sampling import SMOTE
from sklearn.decomposition import PCA

# Load the dataset
titanic = pd.read_csv('../../Machine_Learning_Algorithms/C_Datasets/titanic_train_original.csv')

# Feature Engineering Functions
def extract_title(name):
    return name.split(',')[1].split('.')[0].strip()

def extract_deck(cabin):
    return cabin[0] if pd.notna(cabin) else 'M'

# Feature Engineering
titanic['Title'] = titanic['Name'].apply(extract_title)
titanic['FamilySize'] = titanic['SibSp'] + titanic['Parch'] + 1
titanic['IsAlone'] = (titanic['FamilySize'] == 1).astype(int)
titanic['Deck'] = titanic['Cabin'].apply(extract_deck)

# Drop unnecessary columns
titanic.drop(columns=['Name', 'Ticket', 'Cabin'], inplace=True)

# Separate features and target
X = titanic.drop(columns='Survived')
y = titanic['Survived']

# Identify numerical and categorical features
numeric_features = ['Age', 'Fare', 'Parch', 'SibSp', 'FamilySize']
categorical_features = ['Pclass', 'Sex', 'Embarked', 'Title', 'Deck', 'IsAlone']

# Numeric pipeline
numeric_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('pca', PCA(n_components=10))
])

# Categorical pipeline
categorical_pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_pipeline, numeric_features),
        ('cat', categorical_pipeline, categorical_features)
    ]
)

# Define base models for stacking
estimators = [
    ('rf', RandomForestClassifier(random_state=42)),
    ('gb', GradientBoostingClassifier(random_state=42))
]

# Create the full pipeline with StackingClassifier
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('smote', SMOTE(random_state=42)),
    ('classifier', StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), cv=5))
])

# Define the parameter grid
param_grid = {
    'classifier__rf__n_estimators': [100, 200],
    'classifier__rf__max_depth': [None, 10],
    'classifier__gb__n_estimators': [100, 200],
    'classifier__gb__learning_rate': [0.01, 0.1]
}

# Split data with Stratified K-Fold Cross-Validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=StratifiedKFold(n_splits=5), scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get best parameters and estimator
best_model = grid_search.best_estimator_
print(f'Best parameters found: {grid_search.best_params_}')

# Evaluate the model
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, best_model.predict_proba(X_test)[:, 1])

print(f'Test accuracy: {accuracy}')
print(f'Test F1 score: {f1}')
print(f'Test ROC AUC score: {roc_auc}')


ImportError: cannot import name '_MissingValues' from 'sklearn.utils._param_validation' (C:\Users\monstr\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py)