### Ensuring Consistency Across Training & Inference Datasets: Feature Scaling
**Question**: Load a dataset (e.g., Boston Housing) and perform feature scaling. Ensure the
same scaling is applied during model inference with new data.

In [4]:
# --- Step 1: Import Required Libraries ---
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import numpy as np
import joblib

# --- Step 2: Load Dataset ---
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="MedHouseValue")

# --- Step 3: Train-Test Split ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Step 4: Feature Scaling ---
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Step 5: Train Model ---
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# --- Step 6: Inference on New Data (simulate one row)
new_data = X.iloc[[0]]  # select first row
new_data_scaled = scaler.transform(new_data)
predicted_value = model.predict(new_data_scaled)

print(f"Predicted Median House Value: {predicted_value[0]:.2f}")

# --- Step 7: Save Model & Scaler (optional) ---
joblib.dump(model, 'california_model.pkl')
joblib.dump(scaler, 'california_scaler.pkl')


Predicted Median House Value: 4.15


['california_scaler.pkl']