### MLflow model or artifact can be downloaded by
- client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
- mlflow.artifacts.download_artifacts()

In [1]:
import joblib
import pandas as pd
from mlflow.tracking import MlflowClient


import mlflow
mlflow.set_tracking_uri("http://127.0.0.1:5000")


In [2]:
MLFLOW_TRACKING_URI = 'http://127.0.0.1:5000'
RUN_ID = 'ab7ac267f5e945cba9566b7213e58524'
artifact_path = "preprocessor/preprocessing.pkl"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [3]:
# This will download the artifact locally and return the full local file path
preprocessor = client.download_artifacts(run_id=RUN_ID, path=artifact_path)

print(f"File downloaded to: {preprocessor}")


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

File downloaded to: /var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmphme9c66_/preprocessor/preprocessing.pkl


In [4]:
from mlflow.tracking import MlflowClient
import joblib
import pandas as pd

# Download artifact path
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
RUN_ID = "ab7ac267f5e945cba9566b7213e58524"
artifact_path = "preprocessor/preprocessing.pkl"

# Step 1: Download the file (returns local path as string)
preprocessor_path = client.download_artifacts(run_id=RUN_ID, path=artifact_path)
print(f"File downloaded to: {preprocessor_path}")

# Step 2: Load the actual preprocessor object
preprocessor = joblib.load(preprocessor_path)

# Step 3: Transform new input data
df = pd.DataFrame([{
    "PULocationID": 75,
    "DOLocationID": 235,
    "trip_distance": 5.93
}])

X_processed = preprocessor.transform(df)
X_processed

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

File downloaded to: /var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmphyo13plb/preprocessor/preprocessing.pkl


array([[ 1.18426239, 14.71795397, 14.71795397]])

In [5]:
import mlflow
import os
from xgboost import XGBRegressor

# Step 1: Download the folder
model_dir = mlflow.artifacts.download_artifacts(
    artifact_uri="mlflow-artifacts:/1/ab7ac267f5e945cba9566b7213e58524/artifacts/nyc-duration-model"
)

print(f"Downloaded to: {model_dir}")
print("Files inside the folder:", os.listdir(model_dir))


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Downloaded to: /var/folders/pk/nk0t185511z8g6hmxr_bhkfw0000gn/T/tmpmlafu0cr/nyc-duration-model
Files inside the folder: ['python_env.yaml', 'requirements.txt', 'MLmodel', 'model.xgb', 'serving_input_example.json', 'input_example.json', 'conda.yaml']


In [6]:

# Step 2: Load the XGBoost model from model.xgb
model_path = os.path.join(model_dir, "model.xgb")

model = XGBRegressor()
model.load_model(model_path)

# Step 3: Use the model for prediction
feature = model.predict(X_processed)
print(feature[0])


19.054037


In [10]:
%%writefile predict.py

import os
import joblib
import pandas as pd
from xgboost import XGBRegressor
import mlflow

# === Configuration ===
PREPROCESSOR_URI = "mlflow-artifacts:/1/ab7ac267f5e945cba9566b7213e58524/artifacts/preprocessor/preprocessing.pkl"
MODEL_URI = "mlflow-artifacts:/1/ab7ac267f5e945cba9566b7213e58524/artifacts/nyc-duration-model/model.xgb"

# === Load Preprocessor ===
def load_preprocessor():
    try:
        local_path = mlflow.artifacts.download_artifacts(artifact_uri=PREPROCESSOR_URI)
        preprocessor = joblib.load(local_path)
        print("[INFO] Preprocessor loaded successfully.")
        return preprocessor
    except Exception as e:
        print(f"[ERROR] Failed to load preprocessor: {e}")
        raise

# === Load XGBoost Model ===
def load_model():
    try:
        model_path = mlflow.artifacts.download_artifacts(artifact_uri=MODEL_URI)
        model = XGBRegressor()
        model.load_model(model_path)
        print("[INFO] Model loaded successfully.")
        return model
    except Exception as e:
        print(f"[ERROR] Failed to load model: {e}")
        raise

# === Make Prediction ===
def predict_duration(preprocessor, model, ride_df):
    try:
        X_processed = preprocessor.transform(ride_df)
        prediction = model.predict(X_processed)
        return prediction[0]
    except Exception as e:
        print(f"[ERROR] Prediction failed: {e}")
        raise

# === Predict from Dictionary ===
def predict_from_dict(ride: dict):
    try:
        preprocessor = load_preprocessor()
        model = load_model()
        df = pd.DataFrame([ride])
        return predict_duration(preprocessor, model, df)
    except Exception as e:
        print(f"[ERROR] Failed to predict from dict: {e}")
        return None


if __name__ == "__main__":
    
    try:
        preprocessor = load_preprocessor()
        model = load_model()
        predicted_duration = predict_duration(preprocessor, model, pd.DataFrame([sample_ride]))
        print(f"[RESULT] Predicted trip duration: {predicted_duration:.2f} minutes")
    except Exception:
        print("[FAILED] Prediction pipeline could not complete.")


Overwriting predict.py


In [11]:
%%writefile test.py


import predict

ride = {
    "PULocationID": 75,
    "DOLocationID": 40,
    "trip_distance": 5
}

time = predict.predict_from_dict(ride)

if time is not None:
    print(f"Predicted duration: {time:.2f} minutes")
else:
    print("Prediction failed.")



Overwriting test.py


In [12]:
!python test.py

Downloading artifacts: 100%|█████████████████████| 1/1 [00:00<00:00, 572.76it/s]
[INFO] Preprocessor loaded successfully.
Downloading artifacts: 100%|█████████████████████| 1/1 [00:00<00:00, 109.14it/s]
[INFO] Model loaded successfully.
Predicted duration: 18.37 minutes


In [15]:
### LOAD MODEL directly from   mlflow


# Define input
ride = {
    "PULocationID": 75,
    "DOLocationID": 40,
    "trip_distance": 5
}

# Make sure all features are float
df = pd.DataFrame([ride]).astype("float64")

# Load the model
logged_model = 'runs:/ab7ac267f5e945cba9566b7213e58524/nyc-duration-model'
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict
prediction = loaded_model.predict(df)
print(f"Predicted duration: {prediction[0]:.2f} minutes")



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Predicted duration: 40.15 minutes
