In [53]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from mlflow import MlflowClient
from sklearn.preprocessing import MinMaxScaler

def get_best_model_by_metric(experiment_name, metric_key="recall"):
    """
    Search runs in a given MLflow experiment, order by the specified metric in DESC order,
    and return the model from the top run (best metric).
    """
    client = MlflowClient()

    # Get the experiment
    experiment = client.get_experiment_by_name(experiment_name)
    if experiment is None:
        raise ValueError(f"No experiment found with name: {experiment_name}")

    # Search runs, sorted by descending metric (e.g., 'recall')
    # -> This returns a DataFrame of runs
    runs_df = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=[f"metrics.{metric_key} DESC"],  # sort runs by 'recall' descending
        max_results=1  # just get the best run
    )

    if runs_df.empty:
        raise ValueError(f"No runs found in experiment {experiment_name}.")

    best_run_id = runs_df.iloc[0].run_id
    print(f"Best run found: {best_run_id} (metric={metric_key})")

    model_uri = f"runs:/{best_run_id}/best_svc_model"

    model_version = client.create_registered_model("SVC_Model")
    registered_model = mlflow.register_model(model_uri, "SVC_Model")

    print(f"Модель зарегистрирована с именем {registered_model.name} и версией {registered_model.version}")

    # Load the model
    best_model = mlflow.sklearn.load_model(model_uri)
    return best_model


def run_inference(model, input_data):
    """
    Given a loaded model and a DataFrame (or NumPy array) of input data,
    return the predicted classes.
    """
    scaler = MinMaxScaler()
    input_data[['Amount']] = scaler.fit_transform(input_data[['Amount']])

    predictions = model.predict(input_data)
    return predictions


def load_dataset(path='data/creditcard_2023.csv', n=100):
    df = pd.read_csv(path, index_col=0)
    return df.sample(n).drop('Class', axis=1)


In [55]:
experiment_name = "Creditcard_fraud_detection"  # adjust as needed
best_model = get_best_model_by_metric(experiment_name, metric_key="recall")

inference_df = load_dataset()

predictions = run_inference(best_model, inference_df)

inference_df['Class'] = predictions

inference_df

Best run found: 79a1498b8cf848ed9dd1e7e688384115 (metric=recall)
Модель зарегистрирована с именем SVC_Model и версией 1


Registered model 'SVC_Model' already exists. Creating a new version of this model...
Created version '1' of model 'SVC_Model'.


Unnamed: 0_level_0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
365096,0.037620,-0.479049,-0.242159,-0.086278,0.497940,0.267831,1.747483,-0.261125,0.073346,0.207086,...,0.019163,0.479508,0.502120,-0.201705,0.453322,-0.530314,-0.251576,-0.135809,0.661714,1
115864,-1.190092,-3.037064,0.720965,-0.513321,1.631700,-0.777294,0.579301,-0.272286,1.441938,1.221433,...,-0.870439,0.709355,5.222807,0.289151,1.760912,2.130997,3.560734,-2.023153,0.668788,0
119275,-0.570226,0.202563,-0.152124,-0.291536,0.243848,0.718539,0.100733,0.389103,-0.203493,0.137419,...,0.001742,0.073080,-0.135489,-2.766939,-0.175128,-0.583091,-1.053792,-1.018216,0.682606,0
556182,0.128037,-0.056536,0.956830,-0.172372,0.475574,0.032070,0.752223,-0.190993,0.249082,0.310453,...,-0.075731,0.268057,-0.260421,0.554179,0.106223,-0.631925,-0.406857,-0.318796,0.000000,1
267251,0.247787,-0.175208,0.255522,-1.011081,0.771958,0.313102,0.720508,-0.117235,0.431931,0.358828,...,-0.233179,-0.793312,0.041069,0.202805,-0.804178,0.297166,0.015364,0.064830,0.749614,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391324,-0.053555,0.077174,0.010812,-0.145217,1.631496,-0.786578,0.984502,-0.253847,0.725469,-0.270631,...,-0.287004,-0.805551,-0.406617,0.317946,0.777395,-1.588285,-0.480139,-0.369440,0.153879,1
128374,0.013974,-0.153416,1.496251,-0.659853,0.257513,0.209104,0.643243,-0.121559,0.361920,0.469400,...,-0.189661,-0.549036,-0.000920,0.082854,-0.569821,0.249818,0.092385,0.178151,0.456623,0
433042,-0.897784,-1.532169,-0.607145,0.273807,-0.353225,0.814039,-0.388642,0.309515,-0.087213,-0.711723,...,0.330873,0.967955,-0.797521,-2.364908,-0.388534,0.196914,1.452617,-2.342227,0.627622,1
330266,0.861457,0.471047,-0.797441,1.061251,1.112886,-0.690811,0.162268,-0.082992,-0.922316,-0.477429,...,-0.047404,-0.737681,-0.103898,-0.016141,0.420755,0.153000,0.368487,0.680454,0.840540,1


In [47]:
import requests

url = "http://127.0.0.1:6666/invocations"
payload = {
    "dataframe_split": {
        "columns": [
            "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8",
            "V9", "V10", "V11", "V12", "V13", "V14", "V15",
            "V16", "V17", "V18", "V19", "V20", "V21", "V22",
            "V23", "V24", "V25", "V26", "V27", "V28", "Amount"
        ],
        "data": [
            [
               1.832434,-0.467714,-0.018437,-0.503370,0.305604,-0.174488,0.500547,-0.222747,0.981410,0.609973,-1.461178,0.511663,-0.478403,1.087197,1.031771,0.392294,0.276567,0.357314,-0.341280,-0.455480,-0.017274,0.700740,-0.059692,-0.680290,0.385283,-0.167231,-0.262128,-0.256745,0.619890

            ]
        ]
    }
}

headers = {
    "Content-Type": "application/json"  # no "format=pandas-split" anymore
}

response = requests.post(url, json=payload, headers=headers)

print("Response status code:", response.status_code)
print("Server response:", response.text)


Response status code: 200
Server response: {"predictions": [0]}
