In [2]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

df = pd.read_csv('https://codefinity-content-media.s3.eu-west-1.amazonaws.com/a65bbc96-309e-4df9-a790-a1eb8c815a1c/penguins.csv')
# Removing rows with more than 1 null
df = df[df.isna().sum(axis=1) < 2] 
# Assigining X, y variables
X, y = df.drop('species', axis=1), df['species']
# Create the ColumnTransformer for encoding
ct = make_column_transformer((OneHotEncoder(), ['island', 'sex']), 
                             remainder='passthrough')
# Make a Pipeline of ct, SimpleImputer, and StandardScaler
pipe = make_pipeline(ct, 
                     SimpleImputer(strategy='most_frequent'),
					 StandardScaler()
                    )
# Transform X using the pipeline and print transformed X
X_transformed = pipe.fit_transform(X)
print(X_transformed)


[[-0.97687549 -0.75419317  2.38869888 ...  0.78544923 -1.41834665
  -0.56414208]
 [-0.97687549 -0.75419317  2.38869888 ...  0.1261879  -1.06225022
  -0.50170305]
 [-0.97687549 -0.75419317  2.38869888 ...  0.43046236 -0.42127665
  -1.18853234]
 ...
 [ 1.02367192 -0.75419317 -0.41863795 ... -0.73592307  1.50164406
   1.93341896]
 [ 1.02367192 -0.75419317 -0.41863795 ... -1.19233476  0.7894512
   1.24658968]
 [ 1.02367192 -0.75419317 -0.41863795 ... -0.53307343  0.86067049
   1.49634578]]
