In [20]:
import pickle
import pandas as pd
import numpy as np
import os

def load_model(model_path):
    """Load a trained model from disk."""
    with open(model_path, 'rb') as f:
        return pickle.load(f)

def load_feature_names(model_dir='data/models'):
    """Load shared feature names."""
    feature_names_path = os.path.join(model_dir, 'feature_names.pkl')
    if os.path.exists(feature_names_path):
        with open(feature_names_path, 'rb') as f:
            return pickle.load(f)
    return None

def show_feature_importance(model, feature_names=None, top_n=20):
    """Display feature importance from a trained model."""
    n_features = len(model.feature_importances_)
    
    if feature_names is None or len(feature_names) != n_features:
        print("⚠ Feature names not available or mismatch, using generic names")
        feature_names = [f'feature_{i}' for i in range(n_features)]
    
    importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("="*80)
    print(f"Top {top_n} Feature Importances")
    print("="*80)
    print(f"Total features: {n_features}")
    print()
    for i, row in importance_df.head(top_n).iterrows():
        print(f"{row['feature']:40s} {row['importance']:.6f}")
    
    return importance_df

# List available models
model_dir = 'data/models'
if os.path.exists(model_dir):
    models = [f for f in os.listdir(model_dir) if f.endswith('_model.pkl')]
    print(f"Available models ({len(models)}):")
    for i, model_file in enumerate(models):
        print(f"  [{i}] {model_file}")
    
    # Load shared feature names
    feature_names = load_feature_names(model_dir)
    if feature_names:
        print(f"\n✓ Feature names loaded: {len(feature_names)} features")
    else:
        print("\n⚠ Feature names file not found - will use generic names")
else:
    print(f"⚠ Model directory not found: {model_dir}")
    feature_names = None

Available models (2):
  [0] การเดินทาง_model.pkl
  [1] คลอง_model.pkl

✓ Feature names loaded: 50 features


In [21]:
# Load a specific model
model_file = 'การเดินทาง_model.pkl'  # Available: การเดินทาง_model.pkl, คลอง_model.pkl
model_path = os.path.join(model_dir, model_file)

print(f"Loading model: {model_file}")
model = load_model(model_path)
print(f"Model type: {type(model).__name__}")

# Show feature importance with actual feature names
importance_df = show_feature_importance(model, feature_names, top_n=15)

Loading model: การเดินทาง_model.pkl
Model type: RandomForestClassifier
Model type: RandomForestClassifier
Top 15 Feature Importances
Total features: 50

day_cos                                  0.236952
grid_lat                                 0.138662
latitude_weather                         0.133204
day_sin                                  0.107108
longitude                                0.046067
latitude                                 0.029435
wind_direction_10m (°)                   0.025649
longitude_weather                        0.024709
day_of_week                              0.014053
cloud_cover (%)                          0.013700
district_ธนบุรี                          0.013429
month_sin                                0.012991
hour_sin                                 0.012263
rain (mm)                                0.011991
district_ปทุมวัน                         0.011694
Top 15 Feature Importances
Total features: 50

day_cos                                  0.236952


In [22]:
# Option: Load feature names if saved with model metadata
# If you retrain models with metadata, use this approach:

def load_model_with_metadata(model_path):
    """Load model and metadata if available."""
    with open(model_path, 'rb') as f:
        data = pickle.load(f)
    
    if isinstance(data, dict):
        # New format with metadata
        return data['model'], data.get('feature_names', None)
    else:
        # Old format (just the model)
        return data, None

# Example usage:
model_path = os.path.join(model_dir, 'การเดินทาง_model.pkl')
model, feature_names = load_model_with_metadata(model_path)

if feature_names:
    print(f"Found {len(feature_names)} feature names")
    importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    print(importance_df.head(15))
else:
    print("No feature names saved with model - showing generic names")
    show_feature_importance(model, top_n=15)

No feature names saved with model - showing generic names
⚠ Feature names not available or mismatch, using generic names
⚠ Feature names not available or mismatch, using generic names
Top 15 Feature Importances
Total features: 50

feature_21                               0.236952
feature_2                                0.138662
feature_13                               0.133204
feature_20                               0.107108
feature_0                                0.046067
feature_1                                0.029435
feature_10                               0.025649
feature_14                               0.024709
feature_16                               0.014053
feature_9                                0.013700
feature_31                               0.013429
feature_22                               0.012991
feature_18                               0.012263
feature_7                                0.011991
feature_40                               0.011694
Top 15 Feature Impo