In [2]:
# use scikit learn with column transformer 
import pandas as pd 
from sklearn.compose import ColumnTransformer 
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
import seaborn as sns 

In [None]:
# load 'titanic' dataset 
titanic = sns.load_dataset('titanic')
titanic.head()

In [None]:
titanic.isnull().sum()

In [5]:
X = titanic[['age', 'fare', 'sex', 'class', 'embark_town']]
y = titanic['survived']

In [6]:
num_transformer = Pipeline( steps= [
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
]
                           )

cat_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))   
])

In [7]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_transformer, ['age', 'fare']),       
        ('cat', cat_transformer, ['sex', 'class', 'embark_town'])
    ]
)

In [8]:
# build the pipeline 
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])


In [None]:
# prepare the data 
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the pipeline
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


In [None]:
# Inspect transformed features
X_transformed = preprocessor.fit_transform(X_train)
print("Transformed Feature Shape:", X_transformed.shape)

In [None]:
X_transformed[:1]