### Ensuring Consistency Across Training & Inference Datasets: Pipeline Integration
**Question**: Create and train a machine learning pipeline that ensures feature transformation consistency across training and inference datasets using scikit-learn.

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np

# Step 1: Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Step 2: Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Create pipeline with scaler + classifier
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(random_state=42))
])

# Step 4: Train the pipeline
pipeline.fit(X_train, y_train)

# Step 5: Inference on test data using the same pipeline
y_pred = pipeline.predict(X_test)

# Step 6: Verify inference consistency by transforming data manually and comparing
X_test_scaled_manual = pipeline.named_steps['scaler'].transform(X_test)
y_pred_manual = pipeline.named_steps['clf'].predict(X_test_scaled_manual)

# Check if predictions match
assert np.array_equal(y_pred, y_pred_manual), "Predictions differ between pipeline and manual transform!"

# Step 7: Print sample outputs
print("Sample predictions from pipeline:", y_pred[:10])
print("Sample predictions from manual transform + clf:", y_pred_manual[:10])
print("Pipeline ensures consistent feature transformation during training and inference.")