### MLflow model or artifact can be downloaded by
- client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
- mlflow.artifacts.download_artifacts()

*run id and artifact path are gotten from the 01 intro modelling notebook details*





import mlflow
import mlflow.xgboost

# Set tracking URI
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# Method 1: Load directly from model URI (EASIEST)
model_uri = "models:/nyc-taxi-duration-model-January/latest"  # or specific version like "/1"
model = mlflow.xgboost.load_model(model_uri)
print("Method 1 - Model loaded from registry:", type(model))

# Method 2: Load from specific run ID
run_id = "c2cbe18632664c65b30f263292544ade"  # Your run ID
model_uri = f"runs:/{run_id}/nyc-duration-model"
model = mlflow.xgboost.load_model(model_uri)
print("Method 2 - Model loaded from run:", type(model))

# Method 3: Download artifacts first, then load (your current approach - FIXED)
dst = mlflow.artifacts.download_artifacts(
    artifact_uri="mlflow-artifacts:/3/models/m-f7fa83f11f764032b16af5490676a2b1/artifacts"
)
model = mlflow.xgboost.load_model(dst)  # ✅ Use mlflow.xgboost.load_model
print("Method 3 - Model loaded from downloaded artifacts:", type(model))

# Method 4: Load with Python function flavor (universal but less optimal)
model_pyfunc = mlflow.pyfunc.load_model(dst)
print("Method 4 - Model loaded as PyFunc:", type(model_pyfunc))

# Method 5: Complete example with error handling
def load_xgboost_model_safely(model_name: str, version: str = "latest"):
    """Safely load XGBoost model from MLflow registry"""
    try:
        model_uri = f"models:/{model_name}/{version}"
        model = mlflow.xgboost.load_model(model_uri)
        print(f"✅ Successfully loaded {model_name} version {version}")
        return model
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return None

In [1]:
import joblib
import pandas as pd
from mlflow.tracking import MlflowClient


import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000")


In [2]:
MLFLOW_TRACKING_URI = 'http://127.0.0.1:5000'
RUN_ID = '4073f16458d44ecb8602bb8040aff8ed'
artifact_path = "preprocessor/preprocessing.pkl"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [3]:
# Download artifact path
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
RUN_ID = "4073f16458d44ecb8602bb8040aff8ed"
artifact_path = "preprocessor/preprocessing.pkl"

# Step 1: Download the preprocessor file (returns local path as string)
preprocessor_path = client.download_artifacts(run_id=RUN_ID, path=artifact_path)
print(f"File downloaded to: {preprocessor_path}")

# Step 2: Load the actual preprocessor object
preprocessor = joblib.load(preprocessor_path)

# Step 3: Transform new input data
df = pd.DataFrame([{
    "passenger_count":1.0,
    "trip_distance": 5.93,
    "fare_amount":24.70,
    "total_amount":34.00,
    "PULocationID": 75,
    "DOLocationID": 235,
    
}])

X_processed = preprocessor.transform(df)
X_processed


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

File downloaded to: /var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmp5vlsonta/preprocessing.pkl


array([[ 1.71015764, -0.277915  ,  0.82854128,  0.96872232, 12.75550494,
        12.75550494]])

In [4]:
##directly load from mlflow

model = mlflow.sklearn.load_model("models:/XGBoostdurationModel/1")  # version 1
# Step 3: Use the model for prediction
feature = model.predict(X_processed)
print(feature[0])

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

14.025499999999997


In [4]:
##download the model directly 

import  os
import mlflow.xgboost
mlflow.set_tracking_uri("http://127.0.0.1:5000")

dst = mlflow.artifacts.download_artifacts(
    artifact_uri="mlflow-artifacts:/3/models/m-f7fa83f11f764032b16af5490676a2b1/artifacts"
)

print("Downloaded:", dst, os.listdir(dst))

model = mlflow.xgboost.load_model(dst)  # ✅ Use mlflow.xgboost.load_model
print("Model loaded from downloaded artifacts:", type(model))


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Downloaded: /var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmpulk8odxt/artifacts ['python_env.yaml', 'requirements.txt', 'MLmodel', 'serving_input_example.json', 'model.ubj', 'input_example.json', 'conda.yaml']
Model loaded from downloaded artifacts: <class 'xgboost.sklearn.XGBRegressor'>


### MAKE IT A SCRIPT

In [6]:
%%writefile predict.py
import os
import joblib
import pandas as pd

# ======== LOCAL PATHS () ========
PREPROCESSOR_PATH = "/var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmp5vlsonta/preprocessing.pkl"
MODEL_DIR = "/var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmpulk8odxt/artifacts"  # contains MLmodel, model.pkl, etc.
# =============================================================================

def load_preprocessor_local(preprocessor_path: str):
    try:
        if not os.path.isfile(preprocessor_path):
            raise FileNotFoundError(f"Preprocessor not found at: {preprocessor_path}")
        preprocessor = joblib.load(preprocessor_path)
        print("[INFO] Preprocessor loaded successfully.")
        return preprocessor
    except Exception as e:
        print(f"[ERROR] Failed to load preprocessor: {e}")
        raise

def load_model_local(model_dir: str):
    try:
        if not os.path.isdir(model_dir):
            raise FileNotFoundError(f"Model directory not found: {model_dir}")
        model = mlflow.xgboost.load_model(dst)    
        print(f"[INFO] Model loaded successfully:")
        return model
    except Exception as e:
        print(f"[ERROR] Failed to load model: {e}")
        raise

def predict_duration(preprocessor, model, ride_df: pd.DataFrame) -> float:
    """
    Transform the raw input using the preprocessor and predict duration with the model.
    """
    X_processed = preprocessor.transform(ride_df)
    y_pred = model.predict(X_processed)
    return float(y_pred[0])

def predict_from_dict(ride: dict):
    """
    Convenience wrapper: build a DataFrame from dict, transform, and predict.
    """
    df = pd.DataFrame([ride])
    preprocessor = load_preprocessor_local(PREPROCESSOR_PATH)
    model = load_model_local(MODEL_DIR)
    return predict_duration(preprocessor, model, df)

if __name__ == "__main__":
    # Example input (edit as needed)
    sample_ride = {
        "passenger_count": 1.0,
        "trip_distance": 5.93,
        "fare_amount": 24.70,
        "total_amount": 34.00,
        "PULocationID": 75,
        "DOLocationID": 235,
    }

    try:
        pred = predict_from_dict(sample_ride)
        print(f"[RESULT] Predicted trip duration: {pred:.2f} minutes")
    except Exception as e:
        print(f"[FAILED] Prediction pipeline could not complete: {e}")


Overwriting predict.py


In [7]:
%%writefile test.py
import predict

ride = {
    "passenger_count": 1.0,
    "trip_distance": 5.93,
    "fare_amount": 24.70,
    "total_amount": 34.00,
    "PULocationID": 75,
    "DOLocationID": 235,
}

time = predict.predict_from_dict(ride)

if time is not None:
    print(f"Predicted duration: {time:.2f} minutes")
else:
    print("Prediction failed.")


Overwriting test.py


In [8]:
!python test.py

[INFO] Preprocessor loaded successfully.
[INFO] Model loaded successfully:
Predicted duration: 14.03 minutes
