-> Load model, preprocessing artifacts

also metrics and show them

-> Load plots and show them (roc, confussion matrix)

-> Make inference


### Setup

In [3]:
# Complete MLflow loader for best run
import mlflow
import pickle
import os
import json
import mlflow.tensorflow
from mlflow.entities import ViewType


In [14]:
"""
def get_best_model(results, recall_threshold=0.8, metric_priority="f1_score"):
    # Step 1: filter candidates by recall
    candidates = [res for res in results if res["metrics"].get("recall", 0) >= recall_threshold]

    # Step 2: among candidates, pick the one with best priority metric
    if candidates:
        return max(candidates, key=lambda r: r["metrics"].get(metric_priority, float("-inf")))
    
    # Step 3: fallback – best recall overall
    return max(results, key=lambda r: r["metrics"].get("recall", float("-inf")))
"""


def get_best_run(runs, recall_threshold=0.8, metric_priority="f1_score"):
    """
    Select the best run based on recall threshold and a priority metric.
    
    Args:
        runs (list of dict): Each dict should contain:
            - "metrics" (dict)
            - "params" (dict)
            - "tags" (dict)
            - "run_id" (str)
            - "artifact_uri" (str)
        recall_threshold (float): Minimum recall to be considered a candidate.
        metric_priority (str): Metric to use when multiple candidates meet threshold.

    Returns:
        dict: The best run dictionary.
    """
    # Filter candidates by recall
    candidates = [r for r in runs if r["metrics"].get("recall", 0) >= recall_threshold]

    # Pick best among candidates
    if candidates:
        return max(candidates, key=lambda r: r["metrics"].get(metric_priority, float("-inf")))

    # Fallback: best recall overall
    return max(runs, key=lambda r: r["metrics"].get("recall", float("-inf")))



In [17]:

# --------------------------
# CONFIG
# --------------------------





experiment_name = "Tuning"
experiment = mlflow.get_experiment_by_name(experiment_name)

if experiment is None:
    raise ValueError(f"Experiment '{experiment_name}' not found.")
    
experiment_id = experiment.experiment_id



tracking_uri = "http://127.0.0.1:5000"        # your MLflow server
experiment_id = experiment_id         # experiment ID from UI
metric_to_sort = "f1"                        # metric to choose the best run
artifact_paths = {
    "preprocessing": "preprocessing",       # folder under artifacts
    "model": "model"                         # model artifact path
}

# --------------------------
# SET TRACKING URI
# --------------------------
mlflow.set_tracking_uri(tracking_uri)

# --------------------------
# SEARCH BEST RUN
# --------------------------

runs_df = mlflow.search_runs(
    experiment_ids=[experiment_id],
    filter_string="tags.mlflow.runName = 'best_overall'", # ONLY SEARCHG in best runs
)


# Convert MLflow DataFrame to list of dicts
runs = []
for _, row in runs_df.iterrows():
    runs.append({
        "run_id": row["run_id"],
        "metrics": {k.replace("metrics.", ""): row[k] for k in row.index if k.startswith("metrics.")},
        "params": {k.replace("params.", ""): row[k] for k in row.index if k.startswith("params.")},
        "tags": {k.replace("tags.", ""): row[k] for k in row.index if k.startswith("tags.")},
        "artifact_uri": row["artifact_uri"]
    })

best_run = get_best_run(runs, recall_threshold=0.8, metric_priority="f1_score")
print("Best run ID:", best_run["run_id"])



"""
best_runs = mlflow.search_runs(
    experiment_ids=[experiment_id],
    run_view_type=ViewType.ACTIVE_ONLY,
    order_by=[f"metrics.{metric_to_sort} DESC"],
    filter_string="tags.mlflow.runName = 'best_overall'", # ONLY SEARCHG in best runs
    max_results=1
)
if best_runs.empty:
    raise ValueError("No runs found in the experiment!")

best_run = best_runs.iloc[0]
run_id = best_run.run_id
print(f"Best run_id: {run_id}")
"""


run_id = best_run["run_id"]


Best run ID: 0106566d6efd453a8b905ba3690ecd2a


In [18]:
best_run

{'run_id': '0106566d6efd453a8b905ba3690ecd2a',
 'metrics': {'loss': 0.08819971233606339,
  'accuracy': 1.0,
  'f1_score': 0.999999995,
  'test_precision': 0.9756097793579102,
  'precision': 1.0,
  'test_f1': 0.9638554294926511,
  'test_recall': 0.9523809552192688,
  'test_accuracy': 0.9736841917037964,
  'recall': 1.0,
  'test_loss': 0.11360521614551544},
 'params': {'dropout_rate': '0.4',
  'model_name': 'model1',
  'epochs': '2',
  'learning_rate': '0.00544853401907757'},
 'tags': {'mlflow.source.git.commit': '5c4bd9d8d45fdc2f13e460ddf330262a85a581d2',
  'mlflow.user': 'marcos',
  'mlflow.runName': 'best_overall',
  'mlflow.source.name': '/home/marcos/Escritorio/AI-prod/ML-Complete-Project/scripts/pipeline.py',
  'mlflow.source.type': 'LOCAL'},
 'artifact_uri': 'file:///home/marcos/Escritorio/AI-prod/ML-Complete-Project/mlruns/694254336158470969/0106566d6efd453a8b905ba3690ecd2a/artifacts'}

In [20]:
# --------------------------
# LOAD PARAMS AND METRICS
# --------------------------
params = best_run['params']
metrics = best_run['metrics']
print("Params:", params)
print("Metrics:", metrics)


Params: {'dropout_rate': '0.4', 'model_name': 'model1', 'epochs': '2', 'learning_rate': '0.00544853401907757'}
Metrics: {'loss': 0.08819971233606339, 'accuracy': 1.0, 'f1_score': 0.999999995, 'test_precision': 0.9756097793579102, 'precision': 1.0, 'test_f1': 0.9638554294926511, 'test_recall': 0.9523809552192688, 'test_accuracy': 0.9736841917037964, 'recall': 1.0, 'test_loss': 0.11360521614551544}


### Preprocessing artifacts

In [21]:

# --------------------------
# DOWNLOAD PREPROCESSING ARTIFACTS
# --------------------------
preproc_dir = mlflow.artifacts.download_artifacts(run_id=run_id, artifact_path=artifact_paths["preprocessing"])
print("Preprocessing artifacts downloaded to:", preproc_dir)

# Load scaler
scaler_file = os.path.join(preproc_dir, "scaler.pkl")
with open(scaler_file, "rb") as f:
    scaler = pickle.load(f)

# Load encoder
encoder_file = os.path.join(preproc_dir, "encoder.pkl")
with open(encoder_file, "rb") as f:
    encoder = pickle.load(f)

# Load features
features_file = os.path.join(preproc_dir, "features.json")
with open(features_file, "r") as f:
    features = json.load(f)

print("Scaler, encoder, and features loaded successfully!")

# --------------------------
# LOAD MODEL
# --------------------------
model_uri = f"runs:/{run_id}/{artifact_paths['model']}"
model = mlflow.tensorflow.load_model(model_uri)
print("TensorFlow model loaded successfully!")


Preprocessing artifacts downloaded to: /tmp/tmpbgyajal5/preprocessing
Scaler, encoder, and features loaded successfully!
TensorFlow model loaded successfully!


In [22]:
features

{'features': ['radius_mean',
  'texture_mean',
  'perimeter_mean',
  'area_mean',
  'smoothness_mean',
  'compactness_mean',
  'concavity_mean',
  'concave points_mean',
  'symmetry_mean',
  'fractal_dimension_mean',
  'radius_se',
  'texture_se',
  'perimeter_se',
  'area_se',
  'smoothness_se',
  'compactness_se',
  'concavity_se',
  'concave points_se',
  'symmetry_se',
  'fractal_dimension_se',
  'radius_worst',
  'texture_worst',
  'perimeter_worst',
  'area_worst',
  'smoothness_worst',
  'compactness_worst',
  'concavity_worst',
  'concave points_worst',
  'symmetry_worst',
  'fractal_dimension_worst']}

In [23]:
encoder

{'M': 1, 'B': 0}

In [30]:
scaler.mean_

array([1.41559238e+01, 1.93511328e+01, 9.21518750e+01, 6.58153516e+02,
       9.61988672e-02, 1.03554531e-01, 8.85161713e-02, 4.88897402e-02,
       1.81255273e-01, 6.27087305e-02, 4.09529102e-01, 1.21794902e+00,
       2.90134512e+00, 4.10547617e+01, 6.94725781e-03, 2.51113359e-02,
       3.16497336e-02, 1.17416348e-02, 2.04345078e-02, 3.75897129e-03,
       1.63169453e+01, 2.57480273e+01, 1.07621934e+02, 8.86556445e+02,
       1.32138906e-01, 2.53280762e-01, 2.71695561e-01, 1.14682229e-01,
       2.90017188e-01, 8.38891016e-02])

# si aprovecho y guardo plots los cargo acá (roc,......)

## Inference

In [43]:
import numpy as np
import pandas as pd

new_data = np.random.rand(30)


In [45]:
# Convert to DataFrame with proper column names
new_data_df = pd.DataFrame([new_data], columns=features['features'])

scaled_data = scaler.transform(new_data_df)

scaled_data

array([[-3.81011976e+00, -4.47525609e+00, -3.74545648e+00,
        -1.84687726e+00,  4.19432661e+01,  8.41945848e+00,
         2.83769982e+00, -1.22895558e+00,  7.98952583e+00,
         7.86905115e+01,  1.29262186e+00, -1.98880820e+00,
        -1.35052227e+00, -8.56502669e-01,  2.65027253e+02,
         5.14739301e+01,  2.56959617e+01,  2.48886110e+01,
         6.08471218e+01,  1.07568650e+02, -3.32748876e+00,
        -4.05974255e+00, -3.15387939e+00, -1.53667235e+00,
         1.88609715e+01,  4.28283368e+00,  3.30790906e+00,
         8.97685839e+00, -1.22624044e-01,  1.22801038e+01]])

In [46]:
y_pred = model.predict(scaled_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step


In [47]:
y_pred

array([[2.4497193e-22]], dtype=float32)

In [48]:
new_data

array([6.49124866e-01, 6.13761888e-02, 4.30378660e-01, 2.93356330e-01,
       6.91678048e-01, 5.50111639e-01, 3.17028242e-01, 7.01355829e-04,
       4.00447261e-01, 6.04196771e-01, 7.79130481e-01, 1.05996769e-01,
       7.42612669e-02, 6.79775500e-01, 7.71580382e-01, 9.14887461e-01,
       8.19836300e-01, 1.67821391e-01, 5.23858283e-01, 2.80088752e-01,
       1.32612830e-01, 8.39537527e-01, 5.76754949e-01, 5.93051626e-01,
       5.68597943e-01, 9.24603929e-01, 9.63341499e-01, 7.07447424e-01,
       2.82497004e-01, 3.05025527e-01])

In [49]:
model.summary()