In [43]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from mlflow import MlflowClient
from sklearn.preprocessing import MinMaxScaler

def get_best_model_by_metric(experiment_name, metric_key="recall"):
    """
    Search runs in a given MLflow experiment, order by the specified metric in DESC order,
    and return the model from the top run (best metric).
    """
    client = MlflowClient()

    # Get the experiment
    experiment = client.get_experiment_by_name(experiment_name)
    if experiment is None:
        raise ValueError(f"No experiment found with name: {experiment_name}")

    # Search runs, sorted by descending metric (e.g., 'recall')
    # -> This returns a DataFrame of runs
    runs_df = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=[f"metrics.{metric_key} DESC"],  # sort runs by 'recall' descending
        max_results=1  # just get the best run
    )

    if runs_df.empty:
        raise ValueError(f"No runs found in experiment {experiment_name}.")

    best_run_id = runs_df.iloc[0].run_id
    print(f"Best run found: {best_run_id} (metric={metric_key})")

    # Construct the model URI.
    # If in your training script you did:
    #    mlflow.sklearn.log_model(sk_model=..., artifact_path="best_svc_model")
    #
    # The artifact URI is: runs:/<run_id>/best_svc_model
    # Adjust to match exactly how you logged the model artifact_path.
    model_uri = f"runs:/{best_run_id}/best_svc_model"
    # or "best_lr_model" if you want the best LR model.
    # This depends on how you named the artifact_path in your training code.

    # Load the model
    best_model = mlflow.sklearn.load_model(model_uri)
    return best_model


def run_inference(model, input_data):
    """
    Given a loaded model and a DataFrame (or NumPy array) of input data,
    return the predicted classes.
    """
    scaler = MinMaxScaler()
    input_data[['Amount']] = scaler.fit_transform(input_data[['Amount']])

    predictions = model.predict(input_data)
    return predictions


def load_dataset(path='data/creditcard_2023.csv', n=100):
    df = pd.read_csv(path, index_col=0)
    return df.sample(n).drop('Class', axis=1)


experiment_name = "Creditcard_fraud_detection"  # adjust as needed
best_model = get_best_model_by_metric(experiment_name, metric_key="recall")

inference_df = load_dataset()

predictions = run_inference(best_model, inference_df)

inference_df['Class'] = predictions

inference_df


Best run found: 79a1498b8cf848ed9dd1e7e688384115 (metric=recall)


Unnamed: 0_level_0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
218479,1.861772,-1.234230,0.514520,-1.345492,-0.358378,0.722337,-0.104177,-0.098576,0.520978,1.285519,...,-0.189833,-0.067844,0.268390,1.187114,-0.791320,-0.501227,-0.158947,-0.143213,0.337471,0
453633,-0.280873,-0.521281,-0.208474,0.325014,-0.312927,0.197116,0.092684,-0.034572,-0.410969,-0.388381,...,0.251896,0.116471,0.373899,-0.282014,0.104502,1.750249,0.154110,0.453085,0.191373,1
204690,0.127803,-0.206600,0.147488,-0.981006,0.628847,0.049234,0.836153,-0.141909,0.513140,0.394875,...,-0.156125,-0.402957,0.098209,1.185661,-1.236680,0.692824,-0.056876,0.312488,0.960606,0
557136,0.084814,0.215052,-0.702341,0.490214,-0.432201,-0.365147,-0.289873,0.148522,-0.514406,-0.440852,...,0.284043,0.895766,0.292342,0.480652,-0.410234,0.672282,-0.008933,-0.405015,0.483571,1
373684,-0.591175,0.236107,-0.500463,-0.046175,-0.180777,-0.263922,-0.117182,-0.077028,-0.436866,-0.683597,...,0.217134,1.205222,-0.258565,-0.811547,0.147634,0.244264,-0.135409,0.033111,0.738059,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91069,0.832606,-0.481446,1.291332,-0.155129,-0.025056,0.616705,0.236159,-0.084340,0.614806,0.646703,...,-0.016201,0.511433,-0.021514,0.457393,0.234715,-0.880731,-0.159344,-0.013761,0.901376,0
417297,-1.063151,-2.121775,0.314257,0.016281,1.877024,-0.960144,-0.073691,-0.655543,0.965692,0.481483,...,0.300144,-0.847274,-0.863041,0.234335,-0.422259,-1.137896,2.170846,-0.510029,0.605200,1
464517,-0.727603,0.385219,-0.385247,-0.137451,-0.210879,0.929200,-0.178028,-1.139932,0.463647,-0.410753,...,1.794178,-1.574427,0.940914,-0.308157,0.368691,-1.012972,0.742802,0.959426,0.895328,1
150042,1.807028,-0.823326,0.397699,-0.654671,-0.205457,0.088424,0.088009,-0.174672,1.748543,0.837959,...,-0.215052,-0.048895,0.200041,-0.020850,-0.787461,1.483310,-0.293703,-0.225238,0.693766,0


In [47]:
import requests

url = "http://127.0.0.1:6666/invocations"
payload = {
    "dataframe_split": {
        "columns": [
            "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8",
            "V9", "V10", "V11", "V12", "V13", "V14", "V15",
            "V16", "V17", "V18", "V19", "V20", "V21", "V22",
            "V23", "V24", "V25", "V26", "V27", "V28", "Amount"
        ],
        "data": [
            [
               1.832434,-0.467714,-0.018437,-0.503370,0.305604,-0.174488,0.500547,-0.222747,0.981410,0.609973,-1.461178,0.511663,-0.478403,1.087197,1.031771,0.392294,0.276567,0.357314,-0.341280,-0.455480,-0.017274,0.700740,-0.059692,-0.680290,0.385283,-0.167231,-0.262128,-0.256745,0.619890

            ]
        ]
    }
}

headers = {
    "Content-Type": "application/json"  # no "format=pandas-split" anymore
}

response = requests.post(url, json=payload, headers=headers)

print("Response status code:", response.status_code)
print("Server response:", response.text)


Response status code: 200
Server response: {"predictions": [0]}
