In [14]:
import ray.tune
from pathlib import Path
from dataclasses import dataclass
from functools import lru_cache
import pandas as pd
from typing import List
from functools import partial

In [13]:
# pd.set_option('display.max_colwidth', None)
# pd.set_option('display.max_rows', None)

In [3]:
from etr_fr_expes import metric

2025-04-06 13:46:38,616	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/ledoyen202/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [4]:
EXPE_DIR = Path("../../experimentations/").resolve()

In [5]:
@dataclass
class Expe:
    model: str
    method: str
    task: str
    metric: str
    mode: str
    adapter_name: str
    expe_dir: str = EXPE_DIR
    _expe_analysis = None
    
    
    @property
    def expe_name(self):
        return f"{self.model}.{self.method}.{self.task}"
    
    @property
    def expe_analysis(self):
        hp_search_dir = self.expe_dir / self.expe_name / "results" / f"{self.expe_name}_hp_search"
        if self._expe_analysis is None:
            self._expe_analysis = ray.tune.ExperimentAnalysis(hp_search_dir)
        return self._expe_analysis

    @property
    def dataframe(self):
        dfs = self.expe_analysis.trial_dataframes
        res = (pd.concat(dfs, ignore_index=True)
            .reset_index(level=0)
            .assign(expe=self.expe_name)
            .sort_values(by=self.metric, ascending=self.mode == "min")
            .rename(columns=lambda x: x.replace(self.adapter_name, 'adapter'))
        )
        return res
    
    def get_texts_df(self, row_idx, _type="test"):
        row_df = self.dataframe.iloc[row_idx].filter(regex=f"{_type}.*texts").to_frame().T
        row_df = row_df.explode(list(row_df.columns)).reset_index(drop=True).stack().to_frame()
        return row_df
    
    @property
    def best_trial(self):
        return self.expe_analysis.get_best_trial(metric=self.metric, mode=self.mode, scope="all")
    
ETRFrExpe = partial(Expe, metric="eval_etr_fr_srb", mode="max", adapter_name="lora_etr_fr")
OrangesumExpe = partial(Expe, metric="eval_orangesum_rougeL", mode="max", adapter_name="lora_orangesum")
WikilargeExpe = partial(Expe, metric="eval_wikilarge_fr_sari", mode="max")

In [6]:
@dataclass
class Analysis:
    expes: List[Expe]

    @property
    def dataframe(self):
        dfs = [expe.dataframe for expe in self.expes]
        return pd.concat(dfs, ignore_index=True)

In [7]:
MBARTHEZ, MISTRAL, LLAMA3 = "mbarthez", "mistral", "llama3"
LORA, MTLLORA = "lora", "mtllora"
ETR_FR, ORANGESUM, WIKILARGE, ALL = "etrfr", "orangesum", "wikilarge", "etrfr+orangesum+wikilarge"

In [15]:
etr_fr_analysis = Analysis(
    expes=[
        ETRFrExpe(model=MBARTHEZ, method=LORA, task=ETR_FR),
        ETRFrExpe(model=MBARTHEZ, method=MTLLORA, task=ALL),
        ETRFrExpe(model=LLAMA3, method=LORA, task=ETR_FR),
        ETRFrExpe(model=LLAMA3, method=MTLLORA, task=ALL),
        ETRFrExpe(model=MISTRAL, method=LORA, task=ETR_FR),
        ETRFrExpe(model=MISTRAL, method=MTLLORA, task=ALL),
    ]
)

wikilarge_analysis = Analysis(
    expes=[
        WikilargeExpe(model=MBARTHEZ, method=LORA, task="wikilarge-fr", adapter_name="lora_wikilarge_fr"),
        WikilargeExpe(model=LLAMA3, method=LORA, task=WIKILARGE, adapter_name="lora_wikilarge"),
        WikilargeExpe(model=MISTRAL, method=LORA, task=WIKILARGE, adapter_name="lora_wikilarge"),
    ]
)

orangesum_analysis = Analysis(
    expes=[
        OrangesumExpe(model=MBARTHEZ, method=LORA, task=ORANGESUM),
        OrangesumExpe(model=LLAMA3, method=LORA, task=ORANGESUM),
        OrangesumExpe(model=MISTRAL, method=LORA, task=ORANGESUM),
    ]
)





In [28]:
etr_fr_analysis.expes[3].dataframe.filter(regex="test_etr_fr_(?!texts)")

Unnamed: 0,test_etr_fr_loss,test_etr_fr_rouge1,test_etr_fr_rouge2,test_etr_fr_rougeL,test_etr_fr_rougeLsum,test_etr_fr_sari,test_etr_fr_bleu,test_etr_fr_bertscore_f1_rescaled,test_etr_fr_bertscore_recall_rescaled,test_etr_fr_bertscore_precision_rescaled,...,test_etr_fr_bertscore_precision,test_etr_fr_kmre,test_etr_fr_lix,test_etr_fr_compression_ratio,test_etr_fr_novelty,test_etr_fr_srb,test_etr_fr_n_samples,test_etr_fr_runtime,test_etr_fr_samples_per_second,test_etr_fr_steps_per_second
67,1.413924,33.0898,14.3247,25.705,32.2739,44.322,11.6347,30.5307,31.7495,29.4166,...,73.5828,101.9018,25.7013,56.5829,24.6053,40.0081,53,108.5054,0.488,0.065
68,1.413924,33.0898,14.3247,25.705,32.2739,44.322,11.6347,30.5307,31.7495,29.4166,...,73.5828,101.9018,25.7013,56.5829,24.6053,40.0081,53,108.5054,0.488,0.065
76,1.45692,33.0389,13.2541,25.1095,32.2543,43.8155,11.2989,29.8646,30.4411,29.3478,...,73.5571,103.1468,24.9479,56.4766,25.6148,39.3629,53,109.713,0.483,0.064
69,1.451185,34.9835,14.6733,27.196,34.0026,44.9754,12.3156,31.9309,31.9837,31.9379,...,74.5265,101.998,26.1002,60.8376,24.6058,41.4199,53,108.1609,0.49,0.065
62,1.795096,31.9554,11.9986,23.4239,31.0361,42.6939,9.5138,27.4018,27.8918,26.9028,...,72.642,104.4187,23.5384,58.6628,34.9135,37.5698,53,113.738,0.466,0.062
60,1.675498,31.3699,11.2658,22.3552,30.4777,42.8711,8.6282,26.5051,27.8693,25.107,...,71.9699,102.6967,25.4953,54.4211,30.4337,36.6483,53,113.3699,0.467,0.062
61,1.675498,31.3699,11.2658,22.3552,30.4777,42.8711,8.6282,26.5051,27.8693,25.107,...,71.9699,102.6967,25.4953,54.4211,30.4337,36.6483,53,113.3699,0.467,0.062
74,1.433093,32.1037,12.6519,24.3314,31.5538,42.3859,10.6166,29.3446,30.4253,28.3726,...,73.1921,102.5792,25.2308,55.5174,24.7687,38.3175,53,110.0576,0.482,0.064
75,1.433093,32.1037,12.6519,24.3314,31.5538,42.3859,10.6166,29.3446,30.4253,28.3726,...,73.1921,102.5792,25.2308,55.5174,24.7687,38.3175,53,110.0576,0.482,0.064
73,1.402335,31.4538,12.2082,24.0479,30.7935,41.696,9.8026,26.9917,27.7501,26.3735,...,72.4439,102.3778,25.7,55.8246,22.7764,37.8152,53,113.5913,0.467,0.062
