### Ensuring Consistency Across Training & Inference Datasets: Pipeline Integration
**Question**: Create and train a machine learning pipeline that ensures feature transformation consistency across training and inference datasets using scikit-learn.

In [1]:
# --- Step 1: Import Required Libraries ---
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

# --- Step 2: Load and Prepare the Data ---
# Fetching the California housing dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

# --- Step 3: Split the Data ---
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Step 4: Create the Pipeline ---
# Create a machine learning pipeline with feature scaling and a linear regression model
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Feature scaling step
    ('regressor', LinearRegression())  # Model step
])

# --- Step 5: Train the Model ---
# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)

# --- Step 6: Evaluate the Model ---
# Evaluate the model on the test set
score = pipeline.score(X_test, y_test)
print(f"Model R^2 score on test data: {score:.4f}")

# --- Step 7: Ensure Consistency Across Training & Inference ---
# Simulate inference on new data (for example, using the test set as new data)
new_data = X_test.iloc[0:5]  # Take a subset of the test set as new data
predictions = pipeline.predict(new_data)

# Output predictions
print("Predictions on new data:\n", predictions)


Model R^2 score on test data: 0.5758
Predictions on new data:
 [0.71912284 1.76401657 2.70965883 2.83892593 2.60465725]
