In [1]:
import mlflow
import re
import pandas as pd
import os
from mlflow.tracking import MlflowClient
import re, pandas as pd, os

In [None]:
# 1. Configure name experiment
EXPERIMENT_NAME = "Prompt_Comparison-rhyno-cyt-img"
mlflow.set_tracking_uri("http://127.0.0.1:5000")
client = MlflowClient()
exp = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
if exp is None:
    raise ValueError(f"Esperimento '{EXPERIMENT_NAME}' non trovato.")


In [None]:
# 2. Retrievere all runs
runs_df = mlflow.search_runs(exp.experiment_id)

In [None]:
rows = []
for _, run_info in runs_df.iterrows():
    run_id = run_info.run_id
    run = client.get_run(run_id)
    params = run.data.params
    tags = run.data.tags
    prompt_type = tags.get("prompt_type")
    temperature = float(params.get("temperature", 0))

    # Dowload artifacts
    local_root = client.download_artifacts(run_id, path="")  
    
    # Parse output files
    for root, _, files in os.walk(local_root):
        for fname in files:
            if fname.startswith("output_") and fname.endswith(".txt"):
                gen = int(fname.split("_")[-1].split(".")[0])
                with open(os.path.join(root, fname), encoding="utf-8") as f:
                    text = f.read()
                m = re.search(r"\*\*Tasks\*\*([\s\S]*)", text)
                if not m:
                    continue
                tasks_block = m.group(1)
                tasks = re.findall(r"- (.+)", tasks_block)
                for t in tasks:
                    rows.append({
                        "prompt_type": prompt_type,
                        "temperature": temperature,
                        "generation": gen,
                        "task": t.strip()
                    })


In [5]:
df = pd.DataFrame(rows)

In [8]:
df.head()

Unnamed: 0,prompt_type,temperature,generation,task
0,few_shot,1.0,2,Check the classifications of cells in the curr...
1,few_shot,1.0,2,Examine the reference range for a specific cel...
2,few_shot,1.0,2,Review the classified images of a particular c...
3,few_shot,1.0,2,"Mark a batch of cells as ""Correct"" or ""Incorre..."
4,few_shot,1.0,2,Browse the dashboard to understand the overall...


In [None]:
# 6. Export to Excel with formatting
excel_path = "xlsx/tasks_comparison_rhyno.xlsx"
with pd.ExcelWriter(excel_path, engine="xlsxwriter") as writer:
    df.to_excel(writer, sheet_name="Tasks", index=False)

    workbook  = writer.book
    worksheet = writer.sheets["Tasks"]

   
    header_format = workbook.add_format({
        "bold": True,
        "text_wrap": True,
        "align": "center",
        "valign": "vcenter"
    })

    # Applied header format
    for col_num, col_name in enumerate(df.columns):
        worksheet.write(0, col_num, col_name, header_format)

    
    for idx, col in enumerate(df.columns):
        
        max_len = max(
            df[col].astype(str).map(len).max(),
            len(col)
        ) + 2
        worksheet.set_column(idx, idx, max_len)

print(f"Excel generated: {excel_path}")


Excel generato: tasks_comparison_rhyno.xlsx
