# Solution: Pipeline Construction

In [None]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, OrdinalEncoder
from sklearn.linear_model import LogisticRegression

# 1. Data
df = pd.DataFrame({
    'Age': [25, np.nan, 30, 45, 50],
    'Salary': [50000, 60000, 1000000, 55000, 62000], # Outlier
    'Quality': ['Low', 'High', 'Low', 'High', 'High'],
    'Target': [0, 0, 1, 1, 1]
})

X = df.drop('Target', axis=1)
y = df['Target']

# 2. Pipeline Definition
preprocessor = ColumnTransformer(
    transformers=[
        ('num_age', SimpleImputer(strategy='median'), ['Age']),
        ('num_salary', RobustScaler(), ['Salary']),
        ('cat', OrdinalEncoder(categories=[['Low', 'High']]), ['Quality'])
    ]
)

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('lr', LogisticRegression())
])

# 3. Fit and Predict
model.fit(X, y)

new_data = pd.DataFrame({'Age': [35], 'Salary': [58000], 'Quality': ['Low']})
prediction = model.predict(new_data)
print("Prediction:", prediction)