In [6]:
import pandas as pd
import json
import base64
import tempfile
import os
from catboost import CatBoostRegressor

def load_model_from_json(filepath='model.json'):
    """
    Load CatBoost model from JSON file
    """
    # Read JSON file
    with open(filepath, 'r') as f:
        model_json = json.load(f)
    
    # Decode base64 model data
    model_data = base64.b64decode(model_json['model_data_b64'])
    
    # Write to temporary file
    with tempfile.NamedTemporaryFile(suffix='.cbm', delete=False) as tmp:
        tmp_path = tmp.name
        tmp.write(model_data)
    
    # Load model from temporary file
    loaded_model = CatBoostRegressor()
    loaded_model.load_model(tmp_path)
    
    # Clean up temporary file
    os.unlink(tmp_path)
    
    print(f"✅ Model loaded from {filepath}")
    
    # Optional: Print available parameters
    if 'parameters' in model_json:
        print(f"   Model parameters: {list(model_json['parameters'].keys())}")
    
    return loaded_model

def load_and_predict():
    try:
        # Load the model
        loaded_model = load_model_from_json('model/trip_duration_model_v1.json')
        
        # Create some test data (must match original feature structure)
        test_data = pd.DataFrame({
            'vendorid': [1],           # Categorical
            'passenger_count': [1],    # Numerical
            'trip_distance': [2.5],    # Numerical
            'fare_amount': [10.0],     # Numerical
            'extra': [0.5],            # Numerical
            'mta_tax': [0.5],          # Numerical
            'tip_amount': [1.5],       # Numerical
            'tolls_amount': [0.0],     # Numerical
            'improvement_surcharge': [0.3],  # Numerical
            'total_amount': [12.3],    # Numerical
            'hour': [14],              # Categorical
            'dayofweek': [2],          # Categorical
            'is_weekend': [0],         # Categorical
            'pu_do': ['123_456'],      # Categorical
        })
    
        
        # Make prediction
        prediction = loaded_model.predict(test_data)
        print(f"Predicted duration: {prediction[0]:.2f} minutes")
        
        return prediction, loaded_model
        
    except FileNotFoundError as e:
        print(f"❌ Error: File not found: {e}")
        print("Make sure the model file exists at 'model/trip_duration_model_v1.json'")
        return None, None
    except KeyError as e:
        print(f"❌ Error: Missing key in JSON file: {e}")
        print("The JSON file might not have the expected structure")
        return None, None
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return None, None

In [7]:
load_and_predict()

✅ Model loaded from model/trip_duration_model_v1.json
   Model parameters: ['iterations', 'learning_rate', 'depth', 'loss_function', 'border_count', 'random_seed', 'verbose', 'task_type', 'devices']
Predicted duration: 11.36 minutes


(array([11.36024721]), <catboost.core.CatBoostRegressor at 0x7580d81d9090>)