In [None]:
# Cell 1: Import core libraries
import os
import sys
import numpy as np
import pandas as pd

print('✓ Core libraries imported')

In [None]:
# Cell 2: Import sklearn components
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, TimeSeriesSplit

print('✓ Sklearn components imported')

In [None]:
# Cell 3: Import regression models
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression

try:
    from xgboost import XGBRegressor
    HAS_XGB = True
except ImportError:
    XGBRegressor = None
    HAS_XGB = False

try:
    from lightgbm import LGBMRegressor
    HAS_LGB = True
except ImportError:
    LGBMRegressor = None
    HAS_LGB = False

try:
    from catboost import CatBoostRegressor
    HAS_CB = True
except ImportError:
    CatBoostRegressor = None
    HAS_CB = False

print(f'✓ Models imported (XGB:{HAS_XGB}, LGB:{HAS_LGB}, CB:{HAS_CB})')

In [None]:
# Cell 4: Import optuna, joblib, visualization
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

try:
    import optuna
    HAS_OPTUNA = True
except ImportError:
    optuna = None
    HAS_OPTUNA = False

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
print(f'✓ Visualization libraries imported (Optuna:{HAS_OPTUNA})')

In [None]:
# Cell 5: Set global random seed
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
print(f'✓ Random seed: {RANDOM_STATE}')

In [None]:
# Cell 6: Define data paths
DATA_PATH = '../data'
DATATRAIN = os.path.join(DATA_PATH, 'raw', 'railway-delay-dataset.csv')
DATATEST = None
print(f'✓ Data paths defined\n  Train: {DATATRAIN}')

In [None]:
# Cell 7: Define constants
TARGET_COL = 'DELAY_MINUTES'
DATE_COL = 'SCHEDULED_DT'
print(f'✓ Constants: TARGET={TARGET_COL}, DATE={DATE_COL}')

In [None]:
# Cell 8: Memory management flags
MAX_ROWS = None
DOWNSAMPLE = False
print(f'✓ Memory: MAX_ROWS={MAX_ROWS}, DOWNSAMPLE={DOWNSAMPLE}')

In [None]:
# Cell 9: Create model directory
MODEL_DIR = 'models'
os.makedirs(MODEL_DIR, exist_ok=True)
print(f'✓ Model directory: {MODEL_DIR}')

In [None]:
# Cell 10: Configuration sanity check
print('='*70)
print('CONFIGURATION SUMMARY')
print('='*70)
print(f'Random State: {RANDOM_STATE}')
print(f'Target: {TARGET_COL} (Regression)')
print(f'Date Column: {DATE_COL}')
print(f'Train Data: {DATATRAIN}')
print(f'Model Directory: {MODEL_DIR}')
print(f'Optional: XGB={HAS_XGB}, LGB={HAS_LGB}, CB={HAS_CB}, Optuna={HAS_OPTUNA}')
print('='*70)

In [None]:
# Cell 11: Load training data
df = pd.read_csv(DATATRAIN, nrows=MAX_ROWS)
print(f'✓ Data loaded: {df.shape}, {df.memory_usage(deep=True).sum()/1024**2:.2f} MB')

In [None]:
# Cell 12: Load test data (if available)
df_test = None
if DATATEST and os.path.exists(DATATEST):
    df_test = pd.read_csv(DATATEST, nrows=MAX_ROWS)
    print(f'✓ Test data: {df_test.shape}')
else:
    print('✓ No separate test file')

In [None]:
# Cell 13: Print columns
print(f'Columns ({len(df.columns)} total):')
for i, col in enumerate(df.columns, 1):
    print(f'  {i:3d}. {col}')

In [None]:
# Cell 14: Auto-detect datetime column
dt_keywords = ['TIME', 'DATE', 'DT', 'SCHEDULED', 'ACTUAL']
dt_candidates = [c for c in df.columns if any(k in c.upper() for k in dt_keywords)]
datetime_col = DATE_COL if DATE_COL in df.columns else (dt_candidates[0] if dt_candidates else None)
print(f'Datetime column: {datetime_col}')

In [None]:
# Cell 15: Convert to datetime
if datetime_col:
    df[datetime_col] = pd.to_datetime(df[datetime_col], errors='coerce')
    print(f'✓ Converted {datetime_col}: {df[datetime_col].notna().mean()*100:.1f}% parsed')
else:
    print('⚠ No datetime column')

In [None]:
# Cell 16: Extract hour/weekday features
if datetime_col:
    df['HOUR'] = df[datetime_col].dt.hour
    df['DAY_OF_WEEK'] = df[datetime_col].dt.dayofweek
    df['MONTH'] = df[datetime_col].dt.month
    print('✓ Extracted: HOUR, DAY_OF_WEEK, MONTH')

In [None]:
# Cell 17: Handle missing datetime
if datetime_col and 'HOUR' in df.columns:
    missing = df[datetime_col].isna().sum()
    if missing > 0:
        df['HOUR'].fillna(df['HOUR'].median(), inplace=True)
        df['DAY_OF_WEEK'].fillna(df['DAY_OF_WEEK'].median(), inplace=True)
        print(f'⚠ Filled {missing} missing datetime values')
    else:
        print('✓ No missing datetime')

In [None]:
# Cell 18: Sort by time
if datetime_col:
    df = df.sort_values(datetime_col).reset_index(drop=True)
    print(f'✓ Sorted by {datetime_col}')

In [None]:
# Cell 19: Data info
df.info()

In [None]:
# Cell 20: Display sample
print('First 3 rows:')
display(df.head(3))
print('\nRandom sample:')
display(df.sample(3, random_state=RANDOM_STATE))

In [None]:
# Cell 21: Helper - get route column
def _get_route_column(df):
    keywords = ['ROUTE', 'TRAIN_ID', 'SERVICE', 'TRAIN_NO']
    for col in df.columns:
        if any(k in col.upper() for k in keywords):
            return col
    return None
print('✓ _get_route_column defined')

In [None]:
# Cell 22: Helper - compute previous delay
def compute_prev_delay_safe(df, target_col=TARGET_COL):
    route_col = _get_route_column(df)
    if target_col not in df.columns:
        df['PREV_DELAY'] = 0
        return df
    if route_col:
        df['PREV_DELAY'] = df.groupby(route_col)[target_col].shift(1).fillna(0)
    else:
        df['PREV_DELAY'] = df[target_col].shift(1).fillna(0)
    return df
print('✓ compute_prev_delay_safe defined')

In [None]:
# Cell 23: Helper - compute rolling features
def compute_rolling_features_safe(df, target_col=TARGET_COL, window=7):
    route_col = _get_route_column(df)
    if target_col not in df.columns:
        df[f'ROLLING_MEAN_{window}D'] = 0
        return df
    if route_col:
        df[f'ROLLING_MEAN_{window}D'] = df.groupby(route_col)[target_col].transform(
            lambda x: x.rolling(window, min_periods=1).mean()
        )
    else:
        df[f'ROLLING_MEAN_{window}D'] = df[target_col].rolling(window, min_periods=1).mean()
    return df
print('✓ compute_rolling_features_safe defined')

In [None]:
# Cell 24: Helper - metrics summary
def metrics_summary(y_true, y_pred):
    mask = ~(np.isnan(y_true) | np.isnan(y_pred))
    y_true = y_true[mask]
    y_pred = y_pred[mask]
    if len(y_true) == 0:
        return {'MAE': np.nan, 'RMSE': np.nan, 'R2': np.nan}
    return {
        'MAE': mean_absolute_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'R2': r2_score(y_true, y_pred)
    }
print('✓ metrics_summary defined')

In [None]:
# Cell 25: Helper - residual plots
def plot_residuals(y_true, y_pred, title='Residual Plot'):
    res = y_true - y_pred
    fig, ax = plt.subplots(1, 2, figsize=(14, 5))
    ax[0].scatter(y_pred, res, alpha=0.5)
    ax[0].axhline(0, color='r', linestyle='--')
    ax[0].set_xlabel('Predicted')
    ax[0].set_ylabel('Residuals')
    ax[0].set_title(f'{title} - Residuals vs Predicted')
    ax[1].hist(res, bins=50)
    ax[1].set_title(f'{title} - Distribution')
    plt.tight_layout()
    return fig
print('✓ plot_residuals defined')

In [None]:
# Cell 26: Helper - feature importance
def plot_feature_importance(model, feature_names, top_n=20):
    if not hasattr(model, 'feature_importances_'):
        return None
    imp = model.feature_importances_
    idx = np.argsort(imp)[::-1][:top_n]
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.barh(range(len(idx)), imp[idx])
    ax.set_yticks(range(len(idx)))
    ax.set_yticklabels([feature_names[i] for i in idx])
    ax.invert_yaxis()
    plt.tight_layout()
    return fig
print('✓ plot_feature_importance defined')

In [None]:
# Cell 27: Helper - outlier detection
def detect_outliers_iqr(series, multiplier=1.5):
    Q1 = series.quantile(0.25)
    Q3 = series.quantile(0.75)
    IQR = Q3 - Q1
    return (series < Q1 - multiplier*IQR) | (series > Q3 + multiplier*IQR)
print('✓ detect_outliers_iqr defined')

In [None]:
# Cell 28: Helper - inference
def predict_with_preprocessing(model, preprocessor, X_new):
    X_proc = preprocessor.transform(X_new) if preprocessor else X_new
    return model.predict(X_proc)
print('✓ predict_with_preprocessing defined')

In [None]:
# Cell 29: Test helpers
y_t = np.array([1, 2, 3, 4, 5])
y_p = np.array([1.1, 2.1, 3.1, 4.1, 5.1])
m = metrics_summary(y_t, y_p)
print(f'Test: RMSE={m["RMSE"]:.3f}, R2={m["R2"]:.3f}')
print('✓ All helpers tested')

---
## End Helper Functions
All helper functions defined and tested.
---

In [None]:
# Cell 31: Time features
# Already created in Cell 16
print(f'✓ Time features: {[c for c in df.columns if c in ["HOUR", "DAY_OF_WEEK", "MONTH"]]}')

In [None]:
# Cell 32: Cyclical encoding
if 'HOUR' in df.columns:
    df['SIN_HOUR'] = np.sin(2 * np.pi * df['HOUR'] / 24)
    df['COS_HOUR'] = np.cos(2 * np.pi * df['HOUR'] / 24)
if 'DAY_OF_WEEK' in df.columns:
    df['SIN_DAY'] = np.sin(2 * np.pi * df['DAY_OF_WEEK'] / 7)
    df['COS_DAY'] = np.cos(2 * np.pi * df['DAY_OF_WEEK'] / 7)
print('✓ Cyclical features: SIN_HOUR, COS_HOUR, SIN_DAY, COS_DAY')

In [None]:
# Cell 33: Lag feature (previous delay)
df = compute_prev_delay_safe(df, TARGET_COL)
print(f'✓ PREV_DELAY: mean={df["PREV_DELAY"].mean():.2f}')

In [None]:
# Cell 34: Rolling mean 7D (route-based)
df = compute_rolling_features_safe(df, TARGET_COL, window=7)
print('✓ ROLLING_MEAN_7D created')

In [None]:
# Cell 35: Rolling mean global fallback
if 'ROLLING_MEAN_7D' in df.columns:
    df['ROLLING_MEAN_7D'].fillna(df[TARGET_COL].rolling(7, min_periods=1).mean(), inplace=True)
    print('✓ Rolling mean fallback applied')

In [None]:
# Cell 36: Weather/external features (if available)
# Placeholder for external data joins
weather_cols = [c for c in df.columns if 'WEATHER' in c.upper()]
if weather_cols:
    print(f'✓ Weather features found: {weather_cols}')
else:
    print('✓ No weather features')

In [None]:
# Cell 37: Fill missing engineered features
eng_features = ['PREV_DELAY', 'ROLLING_MEAN_7D', 'SIN_HOUR', 'COS_HOUR', 'SIN_DAY', 'COS_DAY']
for col in eng_features:
    if col in df.columns:
        df[col].fillna(0, inplace=True)
print('✓ Engineered features filled')

In [None]:
# Cell 38: Feature distribution check
eng_cols = [c for c in ['PREV_DELAY', 'ROLLING_MEAN_7D', 'HOUR', 'DAY_OF_WEEK'] if c in df.columns]
if eng_cols:
    print('Feature distributions:')
    display(df[eng_cols].describe())

In [None]:
# Cell 39: Drop leakage columns
leakage_keywords = ['ACTUAL', 'ARRIVAL_TIME', 'DEPARTURE_TIME']
leakage_cols = [c for c in df.columns if any(k in c.upper() for k in leakage_keywords) and c != TARGET_COL]
if leakage_cols:
    df.drop(columns=leakage_cols, inplace=True)
    print(f'⚠ Dropped {len(leakage_cols)} leakage columns')
else:
    print('✓ No leakage columns detected')

In [None]:
# Cell 40: Feature list snapshot
feature_cols = [c for c in df.columns if c != TARGET_COL]
print(f'Total features: {len(feature_cols)}')
print(f'Sample: {feature_cols[:10]}')

In [None]:
# Cell 41: Feature sanity check
print(f'Shape after engineering: {df.shape}')
print(f'Target column present: {TARGET_COL in df.columns}')
print(f'Missing values: {df.isnull().sum().sum()}')

In [None]:
# Cell 42: Print engineered columns
eng_added = ['PREV_DELAY', 'ROLLING_MEAN_7D', 'SIN_HOUR', 'COS_HOUR', 'SIN_DAY', 'COS_DAY']
print('Engineered columns present:')
for col in eng_added:
    print(f'  {col}: {col in df.columns}')

In [None]:
# Cell 43: Memory cleanup
import gc
gc.collect()
print(f'✓ Memory after cleanup: {df.memory_usage(deep=True).sum()/1024**2:.2f} MB')

In [None]:
# Cell 44: Save intermediate (optional)
# df.to_csv('../data/processed/engineered_features.csv', index=False)
print('✓ Feature engineering complete (save disabled)')

---
## End Feature Engineering
All features engineered and validated.
---

In [None]:
# Cell 46: Define X, y
if TARGET_COL in df.columns:
    X = df.drop(columns=[TARGET_COL], errors='ignore')
    y = df[TARGET_COL]
    print(f'✓ X: {X.shape}, y: {y.shape}')
else:
    print(f'⚠ Target column {TARGET_COL} not found!')

In [None]:
# Cell 47: Log-transform target (optional)
USE_LOG_TRANSFORM = False
if USE_LOG_TRANSFORM:
    y_log = np.log1p(y)
    print(f'✓ Log-transformed target: skew={y_log.skew():.3f}')
else:
    y_log = y
    print('✓ No log transform')

In [None]:
# Cell 48: Save original target
y_original = y.copy()
print(f'✓ Original target saved: {len(y_original)} values')

In [None]:
# Cell 49: Target stats
print('Target Statistics:')
print(f'  Mean: {y.mean():.2f}')
print(f'  Median: {y.median():.2f}')
print(f'  Std: {y.std():.2f}')
print(f'  Min: {y.min():.2f}')
print(f'  Max: {y.max():.2f}')

---
## End Target & Feature Split
Target variable prepared for modeling.
---

In [None]:
# Cell 51: Detect numeric features
numeric_features = X.select_dtypes(include=[np.number]).columns.tolist()
print(f'✓ Numeric features: {len(numeric_features)}')

In [None]:
# Cell 52: Detect categorical features
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
print(f'✓ Categorical features: {len(categorical_features)}')

In [None]:
# Cell 53: Detect label-encoded features
label_encoded = [c for c in categorical_features if X[c].nunique() < 50]
print(f'✓ Label-encoded candidates: {len(label_encoded)}')

In [None]:
# Cell 54: Leakage check
leakage_check = ['ACTUAL', 'RESULT', 'OUTCOME']
potential_leakage = [c for c in X.columns if any(k in c.upper() for k in leakage_check)]
if potential_leakage:
    print(f'⚠ Potential leakage: {potential_leakage}')
else:
    print('✓ No leakage detected')

In [None]:
# Cell 55: Numeric transformer
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
print('✓ Numeric transformer created')

In [None]:
# Cell 56: Categorical transformer
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='Unknown')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])
print('✓ Categorical transformer created')

In [None]:
# Cell 57: ColumnTransformer (preprocessor)
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])
print('✓ Preprocessor created')

In [None]:
# Cell 58: Preprocessor sanity check
print(f'Numeric features: {len(numeric_features)}')
print(f'Categorical features: {len(categorical_features)}')
print(f'Total input features: {len(numeric_features) + len(categorical_features)}')

In [None]:
# Cell 59: Feature count check
print(f'Total features going into model: {len(X.columns)}')
print(f'Sample features: {X.columns[:5].tolist()}')

---
## End Preprocessing
Preprocessing pipeline configured.
---

In [None]:
# Cell 61: Train/validation split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE, shuffle=False
)
print(f'✓ Train: {X_train.shape}, Val: {X_val.shape}')

In [None]:
# Cell 62: Print split sizes
print(f'Training samples: {len(X_train)}')
print(f'Validation samples: {len(X_val)}')
print(f'Train target mean: {y_train.mean():.2f}')
print(f'Val target mean: {y_val.mean():.2f}')

In [None]:
# Cell 63: TimeSeriesSplit definition
tscv = TimeSeriesSplit(n_splits=5)
print(f'✓ TimeSeriesSplit: {tscv.get_n_splits()} splits')

In [None]:
# Cell 64: CV sanity check
for i, (train_idx, val_idx) in enumerate(tscv.split(X_train)):
    print(f'Fold {i+1}: Train={len(train_idx)}, Val={len(val_idx)}')
    if i >= 2:
        break

---
## End Split & CV
Data split and cross-validation configured.
---

In [None]:
# Cell 66: Linear regression baseline
lr_model = LinearRegression()
lr_pipe = Pipeline([('preprocessor', preprocessor), ('model', lr_model)])
lr_pipe.fit(X_train, y_train)
y_pred_lr = lr_pipe.predict(X_val)
metrics_lr = metrics_summary(y_val, y_pred_lr)
print(f'Linear Regression: RMSE={metrics_lr["RMSE"]:.2f}, R2={metrics_lr["R2"]:.3f}')

In [None]:
# Cell 67: Random Forest baseline
rf_model = RandomForestRegressor(n_estimators=50, max_depth=10, random_state=RANDOM_STATE, n_jobs=-1)
rf_pipe = Pipeline([('preprocessor', preprocessor), ('model', rf_model)])
rf_pipe.fit(X_train, y_train)
y_pred_rf = rf_pipe.predict(X_val)
metrics_rf = metrics_summary(y_val, y_pred_rf)
print(f'Random Forest: RMSE={metrics_rf["RMSE"]:.2f}, R2={metrics_rf["R2"]:.3f}')

In [None]:
# Cell 68: Baseline evaluation
baseline_results = pd.DataFrame([
    {'Model': 'LinearRegression', **metrics_lr},
    {'Model': 'RandomForest', **metrics_rf}
])
display(baseline_results)

In [None]:
# Cell 69: Save baseline results
baseline_results.to_csv('models/baseline_results.csv', index=False)
print('✓ Baseline results saved')

---
## End Baseline Models
Baseline models trained and evaluated.
---

In [None]:
# Cell 71: Define Optuna objective
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 5, 20)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)
    
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        random_state=RANDOM_STATE,
        n_jobs=-1
    )
    
    pipe = Pipeline([('preprocessor', preprocessor), ('model', model)])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_val)
    
    return np.sqrt(mean_squared_error(y_val, y_pred))

print('✓ Optuna objective defined')

In [None]:
# Cell 72: Define parameter space
param_space = {
    'n_estimators': (50, 200),
    'max_depth': (5, 20),
    'min_samples_split': (2, 20)
}
print(f'✓ Parameter space: {param_space}')

In [None]:
# Cell 73: Create Optuna study
if HAS_OPTUNA:
    study = optuna.create_study(direction='minimize')
    print('✓ Optuna study created')
else:
    study = None
    print('⚠ Optuna not available, skipping tuning')

In [None]:
# Cell 74: Run Optuna optimization
if HAS_OPTUNA and study:
    study.optimize(objective, n_trials=10, show_progress_bar=True)
    print(f'✓ Optimization complete: Best RMSE={study.best_value:.2f}')
else:
    print('⚠ Skipping optimization')

In [None]:
# Cell 75: Best params summary
if HAS_OPTUNA and study:
    best_params = study.best_params
    print('Best parameters:')
    for k, v in best_params.items():
        print(f'  {k}: {v}')
else:
    best_params = {'n_estimators': 100, 'max_depth': 15, 'min_samples_split': 5}
    print(f'Using default params: {best_params}')

In [None]:
# Cell 76: Train tuned model
tuned_model = RandomForestRegressor(**best_params, random_state=RANDOM_STATE, n_jobs=-1)
tuned_pipe = Pipeline([('preprocessor', preprocessor), ('model', tuned_model)])
tuned_pipe.fit(X_train, y_train)
print('✓ Tuned model trained')

In [None]:
# Cell 77: Validation prediction
y_pred_tuned = tuned_pipe.predict(X_val)
print(f'✓ Predictions generated: {len(y_pred_tuned)} values')

In [None]:
# Cell 78: Metrics calculation
metrics_tuned = metrics_summary(y_val, y_pred_tuned)
print('Tuned Model Performance:')
print(f'  RMSE: {metrics_tuned["RMSE"]:.2f}')
print(f'  MAE: {metrics_tuned["MAE"]:.2f}')
print(f'  R2: {metrics_tuned["R2"]:.3f}')

In [None]:
# Cell 79: Model comparison table
all_results = pd.DataFrame([
    {'Model': 'LinearRegression', **metrics_lr},
    {'Model': 'RandomForest_Baseline', **metrics_rf},
    {'Model': 'RandomForest_Tuned', **metrics_tuned}
])
all_results = all_results.sort_values('RMSE')
print('\n🏆 MODEL LEADERBOARD 🏆')
display(all_results)
print(f'\nBest model: {all_results.iloc[0]["Model"]} (RMSE={all_results.iloc[0]["RMSE"]:.2f})')

In [None]:
# Cell 80: Feature importance / SHAP analysis
if hasattr(tuned_model, 'feature_importances_'):
    # Get feature names after preprocessing
    feature_names_out = (numeric_features + 
                        [f'{cat}_{val}' for cat in categorical_features 
                         for val in X_train[cat].unique()[:5]])
    
    imp_df = pd.DataFrame({
        'Feature': feature_names_out[:len(tuned_model.feature_importances_)],
        'Importance': tuned_model.feature_importances_
    }).sort_values('Importance', ascending=False)
    
    print('\nTop 10 Features:')
    display(imp_df.head(10))
else:
    print('⚠ Feature importance not available')

In [None]:
# Cell 81: Save best model
best_model_path = os.path.join(MODEL_DIR, 'best_model.pkl')
joblib.dump(tuned_pipe, best_model_path)
print(f'✓ Best model saved to {best_model_path}')

In [None]:
# Cell 82: Inference on test / end notebook
print('='*70)
print('NOTEBOOK COMPLETE')
print('='*70)
print(f'Final Model: RandomForest (Tuned)')
print(f'Best RMSE: {metrics_tuned["RMSE"]:.2f}')
print(f'Best R²: {metrics_tuned["R2"]:.3f}')
print(f'Model saved: {best_model_path}')
print('\nReady for production deployment!')
print('='*70)