### Handling Missing Values - Imputation within ML Pipelines
**Description**: Implement a machine learning pipeline that includes imputation and a classifier.

In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Sample dataset with missing values
data = {
    'Age': [25, 30, None, 22, 28, None, 35],
    'Income': [50000, None, 62000, 58000, None, 54000, 61000],
    'Gender': ['Male', 'Female', 'Female', None, 'Male', 'Female', 'Male'],
    'Purchased': [0, 1, 0, 1, 0, 1, 0]
}

df = pd.DataFrame(data)

# Convert categorical column 'Gender' to numeric using one-hot encoding
df = pd.get_dummies(df, columns=['Gender'], dummy_na=True)

# Separate features and target
X = df.drop('Purchased', axis=1)
y = df['Purchased']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create pipeline with imputation and classifier
pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),  # Impute missing values with mean
    ('classifier', RandomForestClassifier(random_state=42))
])

# Train
pipeline.fit(X_train, y_train)

# Predict
y_pred = pipeline.predict(X_test)

# Evaluate
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.33      1.00      0.50         1
           1       0.00      0.00      0.00         2

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
