Importing the necessary libraries and loading the dataset:

In [None]:
import pandas as pd
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from yellowbrick.classifier import ConfusionMatrix

penguins = sns.load_dataset('penguins')


Here , we'll split the dataset into training and testing sets:

In [None]:
X_train, X_test, y_train, y_test = train_test_split(penguins.drop('species', axis=1), penguins['species'], test_size=0.2, random_state=42)

Now let's define the preprocessing steps for our pipeline. We'll use a ColumnTransformer to apply different transformations to different columns of the dataset:
The numeric_transformer applies a median imputation to missing values, followed by standard scaling of the numeric features. The preprocessor applies this transformation to the numeric columns and leaves the categorical columns unchanged.
The categorical_transformer first imputes the missing values with the most frequent value and then applies one-hot encoding to encode categorical variables. Meanwhile also added the ColumnTransformer to apply categorical_transformer to the 'sex' and 'island' columns.

In [None]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']),
        ('cat', categorical_transformer, ['sex', 'island'])
    ])


Let's define the logistic regression model that we want to fit to the data:

In [None]:
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))])


Now let's fit the model and visualize the results using the ConfusionMatrix from Yellowbrick:

In [None]:
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

cm = ConfusionMatrix(clf, classes=['Adelie', 'Chinstrap', 'Gentoo'])
cm.score(X_test, y_test)

cm.show()
