In [24]:
import pickle
import pandas as pd
import numpy as np
import os

def load_model(model_path):
    """Load a trained model from disk."""
    with open(model_path, 'rb') as f:
        return pickle.load(f)

def load_feature_names(model_dir='data/models'):
    """Load shared feature names."""
    feature_names_path = os.path.join(model_dir, 'feature_names.pkl')
    if os.path.exists(feature_names_path):
        with open(feature_names_path, 'rb') as f:
            return pickle.load(f)
    return None

def show_feature_importance(model, feature_names=None, top_n=20):
    """Display feature importance from a trained model."""
    n_features = len(model.feature_importances_)
    
    if feature_names is None or len(feature_names) != n_features:
        print("⚠ Feature names not available or mismatch, using generic names")
        feature_names = [f'feature_{i}' for i in range(n_features)]
    
    importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("="*80)
    print(f"Top {top_n} Feature Importances")
    print("="*80)
    print(f"Total features: {n_features}")
    print()
    for i, row in importance_df.head(top_n).iterrows():
        print(f"{row['feature']:40s} {row['importance']:.6f}")
    
    return importance_df

# List available models
model_dir = 'data/models'
if os.path.exists(model_dir):
    models = [f for f in os.listdir(model_dir) if f.endswith('_model.pkl')]
    print(f"Available models ({len(models)}):")
    for i, model_file in enumerate(models):
        print(f"  [{i}] {model_file}")
    
    # Load shared feature names
    feature_names = load_feature_names(model_dir)
    if feature_names:
        print(f"\n✓ Feature names loaded: {len(feature_names)} features")
    else:
        print("\n⚠ Feature names file not found - will use generic names")
else:
    print(f"⚠ Model directory not found: {model_dir}")
    feature_names = None

Available models (25):
  [0] PM2.5_model.pkl
  [1] การเดินทาง_model.pkl
  [2] กีดขวาง_model.pkl
  [3] คนจรจัด_model.pkl
  [4] คลอง_model.pkl
  [5] ความปลอดภัย_model.pkl
  [6] ความสะอาด_model.pkl
  [7] จราจร_model.pkl
  [8] ต้นไม้_model.pkl
  [9] ถนน_model.pkl
  [10] ทางเท้า_model.pkl
  [11] ท่อระบายน้ำ_model.pkl
  [12] น้ำท่วม_model.pkl
  [13] ป้าย_model.pkl
  [14] ป้ายจราจร_model.pkl
  [15] ร้องเรียน_model.pkl
  [16] สอบถาม_model.pkl
  [17] สะพาน_model.pkl
  [18] สัตว์จรจัด_model.pkl
  [19] สายไฟ_model.pkl
  [20] ห้องน้ำ_model.pkl
  [21] เสนอแนะ_model.pkl
  [22] เสียงรบกวน_model.pkl
  [23] แสงสว่าง_model.pkl
  [24] ไม่ระบุ_model.pkl

✓ Feature names loaded: 72 features


In [25]:
# Load a specific model
model_file = 'แสงสว่าง_model.pkl'  # Available: การเดินทาง_model.pkl, คลอง_model.pkl
model_path = os.path.join(model_dir, model_file)

print(f"Loading model: {model_file}")
model = load_model(model_path)
print(f"Model type: {type(model).__name__}")

# Show feature importance with actual feature names
importance_df = show_feature_importance(model, feature_names, top_n=15)

Loading model: แสงสว่าง_model.pkl
Model type: RandomForestClassifier
Top 15 Feature Importances
Total features: 72

hour                                     0.163605
hour_sin                                 0.138243
hour_cos                                 0.090001
temperature_2m (°C)                      0.032698
surface_pressure (hPa)                   0.032409
wind_direction_10m (°)                   0.031748
wind_speed_10m (km/h)                    0.031612
relative_humidity_2m (%)                 0.031314
dew_point_2m (°C)                        0.031029
vapour_pressure_deficit (kPa)            0.030779
cloud_cover (%)                          0.030255
pm25                                     0.028893
pm10                                     0.028086
rain (mm)                                0.027096
o3                                       0.025768


In [26]:
# Option: Load feature names if saved with model metadata
# If you retrain models with metadata, use this approach:

def load_model_with_metadata(model_path):
    """Load model and metadata if available."""
    with open(model_path, 'rb') as f:
        data = pickle.load(f)
    
    if isinstance(data, dict):
        # New format with metadata
        return data['model'], data.get('feature_names', None)
    else:
        # Old format (just the model)
        return data, None

# Example usage:
model_path = os.path.join(model_dir, 'การเดินทาง_model.pkl')
model, feature_names = load_model_with_metadata(model_path)

if feature_names:
    print(f"Found {len(feature_names)} feature names")
    importance_df = pd.DataFrame({
        'feature': feature_names,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    print(importance_df.head(15))
else:
    print("No feature names saved with model - showing generic names")
    show_feature_importance(model, top_n=15)

No feature names saved with model - showing generic names
⚠ Feature names not available or mismatch, using generic names
Top 15 Feature Importances
Total features: 72

feature_19                               0.119638
feature_18                               0.115401
feature_17                               0.080729
feature_16                               0.076082
feature_14                               0.057286
feature_21                               0.039796
feature_20                               0.034196
feature_13                               0.029135
feature_9                                0.027539
feature_2                                0.025595
feature_10                               0.025186
feature_0                                0.023698
feature_68                               0.022070
feature_4                                0.021627
feature_11                               0.021490
