### Ensuring Consistency Across Training & Inference Datasets: Pipeline Integration
**Question**: Create and train a machine learning pipeline that ensures feature transformation consistency across training and inference datasets using scikit-learn.

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

# Step 1: Load and validate dataset
try:
    df = pd.read_csv('your_dataset.csv')  # Replace with actual dataset
    print("✅ Dataset loaded successfully.")
except FileNotFoundError:
    raise FileNotFoundError("❌ Dataset not found. Please verify the file path.")
except Exception as e:
    raise Exception(f"❌ Error loading dataset: {e}")

# Step 2: Data integrity checks
if df.isnull().sum().sum() > 0:
    print("⚠️ Missing values detected. Applying mean imputation.")
    df.fillna(df.mean(numeric_only=True), inplace=True)

# Check data types
numeric_cols = df.select_dtypes(include=[np.number]).columns
if len(numeric_cols) < len(df.columns):
    raise TypeError("❌ Non-numeric columns found. Ensure all features are numerical.")

# Step 3: Prepare features and target
try:
    X = df.drop('target', axis=1)
    y = df['target']
except KeyError:
    raise KeyError("❌ 'target' column not found in dataset.")

# Step 4: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Create pipeline with consistent transformations
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LinearRegression())
])

# Step 6: Train pipeline
try:
    pipeline.fit(X_train, y_train)
    print("✅ Pipeline trained successfully.")
except Exception as e:
    raise Exception(f"❌ Error during training: {e}")

# Step 7: Evaluate pipeline on test data
try:
    predictions = pipeline.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    print(f"📊 Mean Squared Error on Test Set: {mse:.2f}")
except Exception as e:
    raise Exception(f"❌ Error during evaluation: {e}")


FileNotFoundError: ❌ Dataset not found. Please verify the file path.