In [317]:
import ray.tune
from pathlib import Path
from dataclasses import dataclass, field
from functools import lru_cache
import pandas as pd
from typing import List
from functools import partial

  from .autonotebook import tqdm as notebook_tqdm
2025-04-07 18:23:17,308	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-04-07 18:23:17,886	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [318]:
# pd.set_option('display.max_colwidth', None)
# pd.set_option('display.max_rows', None)

In [319]:
from etr_fr_expes import metric

2025-04-07 18:23:23,825	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/ledoyen202/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [320]:
EXPE_DIR = Path("../../experimentations/").resolve()

In [345]:
@dataclass
class Expe:
    model: str
    method: str
    task: str
    train_tasks: str
    metric: str
    mode: str
    adapter_name: str
    expe_dir: str = EXPE_DIR
    _expe_analysis = None
    
    @property
    def base_columns(self):
        return [
            # "expe",
            "trial_id",
            "model", 
            "method", 
            "task", 
            "train_tasks", 
            # "metric",
        ]
    
    @property
    def expe_name(self):
        return f"{self.model}.{self.method}.{self.train_tasks}"
    
    @property
    def expe_analysis(self):
        hp_search_dir = self.expe_dir / self.expe_name / "results" / f"{self.expe_name}_hp_search"
        if self._expe_analysis is None:
            self._expe_analysis = ray.tune.ExperimentAnalysis(hp_search_dir)
        return self._expe_analysis

    @property
    def dataframe(self):
        dfs = self.expe_analysis.trial_dataframes
        res = (pd.concat(dfs, ignore_index=True)
            .reset_index(level=0)
            .assign(
                expe=self.expe_name, 
                model=self.model, 
                metric=self.metric,
                method=self.method,
                task=self.task,
                train_tasks=self.train_tasks,
            )
            .sort_values(by=f"eval_{self.metric}", ascending=self.mode == "min")
            .rename(columns=lambda x: x.replace(self.adapter_name, 'adapter'))
        )
        res = res.set_index(self.base_columns)
        return res
    
    def metric_columns(self, _type="test"):
        return self.dataframe.filter(
            regex=f"{_type}_{self.task}_(?!texts)"
        ).columns
        
    def text_columns(self, _type="test"):
        return self.dataframe.filter(
            regex=f"{_type}.*texts"
        ).columns
        
    @property
    def test_metric_df(self):
        return self.dataframe[self.metric_columns(_type="test")]
    
    @property
    def eval_metric_df(self):
        return self.dataframe[self.metric_columns(_type="eval")]
    
    @property
    def best_model(self):
        return self.dataframe.iloc[0]
    
    def get_texts_df(self, row_idx, _type="test"):
        row_df = self.dataframe.iloc[row_idx].filter(regex=f"{_type}.*texts").to_frame().T
        row_df = row_df.explode(list(row_df.columns)).reset_index(drop=True).stack().to_frame()
        return row_df
    
    @property
    def best_trial(self):
        return self.expe_analysis.get_best_trial(metric=self.metric, mode=self.mode, scope="all")
    
ETRFrExpe = partial(Expe, metric="etr_fr_srb", mode="max", task="etr_fr", adapter_name="lora_etr_fr")
OrangesumExpe = partial(Expe, metric="orangesum_rougeL", mode="max", task="orangesum", adapter_name="lora_orangesum")
WikilargeExpe = partial(Expe, metric="wikilarge_fr_sari", mode="max", task="wikilarge_fr")

In [369]:
@dataclass
class Analysis:
    expes: List[Expe]
    metrics: List[str] = field(default_factory=lambda :[
        "rouge1",
        "rouge2",
        "rougeL",
        "sari",
        "bertscore_f1",
        "srb",
        "compression_ratio",
        "novelty",
        "kmre",
        "lix"
    ])

    @property
    def dataframe(self):
        dfs = [expe.dataframe for expe in self.expes]
        return pd.concat(dfs)
    
    def best_models(self, _type=None, texts=False):
        series = [expe.best_model for expe in self.expes]
        res = pd.DataFrame(series)
        
        return_metrics = _type in ["test", "eval"]
        if not texts and return_metrics:
            res = res.filter(regex=f"({'|'.join(self.metrics)})")
        elif return_metrics:
            cols = self.expes[0].text_columns(_type=_type)
            res = res[cols]
        
        
        return res
    
    @property
    def test_metrics(self):
        return self.best_models(_type="test")
    
    @property
    def test_texts(self):
        return self.best_models(_type="test", texts=True)

In [370]:
MBARTHEZ, MISTRAL, LLAMA3 = "mbarthez", "mistral", "llama3"
LORA, MTLLORA = "lora", "mtllora"
ETR_FR, ORANGESUM, WIKILARGE, ETR_FR_ORANGESUM, ALL = "etrfr", "orangesum", "wikilarge", "etrfr+orangesum", "etrfr+orangesum+wikilarge"

In [371]:
etr_fr_analysis = Analysis(
    expes=[
        ETRFrExpe(model=MBARTHEZ, method=LORA, train_tasks=ETR_FR),
        ETRFrExpe(model=MBARTHEZ, method=MTLLORA, train_tasks=ALL),
        ETRFrExpe(model=LLAMA3, method=LORA, train_tasks=ETR_FR),
        ETRFrExpe(model=LLAMA3, method=MTLLORA, train_tasks=ALL),
        ETRFrExpe(model=LLAMA3, method=MTLLORA, train_tasks=ETR_FR_ORANGESUM),
        ETRFrExpe(model=MISTRAL, method=LORA, train_tasks=ETR_FR),
        ETRFrExpe(model=MISTRAL, method=MTLLORA, train_tasks=ALL),
    ]
)

wikilarge_analysis = Analysis(
    expes=[
        WikilargeExpe(model=MBARTHEZ, method=LORA, train_tasks="wikilarge-fr", adapter_name="lora_wikilarge_fr"),
        WikilargeExpe(model=LLAMA3, method=LORA, train_tasks=WIKILARGE, adapter_name="lora_wikilarge"),
        WikilargeExpe(model=MISTRAL, method=LORA, train_tasks=WIKILARGE, adapter_name="lora_wikilarge"),
    ]
)

orangesum_analysis = Analysis(
    expes=[
        OrangesumExpe(model=MBARTHEZ, method=LORA, train_tasks=ORANGESUM),
        OrangesumExpe(model=LLAMA3, method=LORA, train_tasks=ORANGESUM),
        OrangesumExpe(model=MISTRAL, method=LORA, train_tasks=ORANGESUM),
    ]
)

In [34]:
etr_fr_analysis.test_metrics

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,test_etr_fr_rouge1,test_etr_fr_rouge2,test_etr_fr_rougeL,test_etr_fr_rougeLsum,test_etr_fr_sari,test_etr_fr_bertscore_f1_rescaled,test_etr_fr_bertscore_f1,test_etr_fr_kmre,test_etr_fr_lix,test_etr_fr_compression_ratio,...,eval_etr_fr_rougeL,eval_etr_fr_rougeLsum,eval_etr_fr_sari,eval_etr_fr_bertscore_f1_rescaled,eval_etr_fr_bertscore_f1,eval_etr_fr_kmre,eval_etr_fr_lix,eval_etr_fr_compression_ratio,eval_etr_fr_novelty,eval_etr_fr_srb
b1a51_00003,mbarthez,lora,etr_fr,etrfr,27.7492,9.2272,21.532,26.9325,39.8308,21.1803,70.4623,,,69.3044,...,29.2315,33.8361,42.7233,30.5028,73.9559,96.4316,28.4553,57.9343,13.4685,42.1717
7de10_00011,mbarthez,mtllora,etr_fr,etrfr+orangesum+wikilarge,28.0493,9.233,20.4604,27.2166,38.7414,22.5912,70.991,103.0705,24.6935,58.7936,...,28.6006,35.0402,42.58,29.6772,73.6465,96.3077,28.351,46.0451,15.6279,41.6505
8a9fd_00003,llama3,lora,etr_fr,etrfr,32.574,13.3147,25.4039,31.679,42.358,27.8567,72.9643,101.2611,26.6041,55.9325,...,32.7097,40.527,45.9699,36.7264,76.2882,95.3582,29.0824,45.7141,22.3861,45.8481
a6a20_00009,llama3,mtllora,etr_fr,etrfr+orangesum+wikilarge,33.0898,14.3247,25.705,32.2739,44.322,30.5307,73.9664,101.9018,25.7013,56.5829,...,33.9438,41.9159,49.4661,39.8132,77.445,97.4047,27.8156,49.011,35.2361,47.932
e160e_00010,llama3,mtllora,etr_fr,etrfr+orangesum,33.0561,13.1307,25.2967,32.2475,43.8882,29.9372,73.744,102.6062,25.6946,58.3828,...,34.6758,42.8946,50.6565,39.9974,77.514,98.3807,26.6145,50.0887,36.4972,48.7962
142d8_00003,mistral,lora,etr_fr,etrfr,33.0512,12.4423,23.9149,31.8074,42.3321,29.2774,73.4967,102.5652,24.7447,56.1678,...,32.1511,40.1984,47.4291,37.1022,76.429,97.4827,27.5085,47.3911,28.1271,45.962
fc28b_00009,mistral,mtllora,etr_fr,etrfr+orangesum+wikilarge,35.0767,13.1091,25.482,33.7238,42.5381,30.532,73.9669,102.0032,25.7134,57.1174,...,33.9146,42.3962,48.8829,39.1615,77.2008,97.6679,28.3413,48.0869,30.127,47.6977


In [35]:
etr_fr_analysis.expes[0].dataframe.reset_index().sort_values(by="eval_etr_fr_srb").filter(regex="(eval_etr_fr.*(rouge|sari|bert|srb)|epoch|learning_rate)")

Unnamed: 0,epoch,learning_rate,eval_etr_fr_rouge1,eval_etr_fr_rouge2,eval_etr_fr_rougeL,eval_etr_fr_rougeLsum,eval_etr_fr_sari,eval_etr_fr_bertscore_f1_rescaled,eval_etr_fr_bertscore_recall_rescaled,eval_etr_fr_bertscore_precision_rescaled,eval_etr_fr_bertscore_f1,eval_etr_fr_bertscore_recall,eval_etr_fr_bertscore_precision,eval_etr_fr_srb,config/train_loop_config/training_kwargs/learning_rate,config/train_loop_config/training_kwargs/num_train_epochs
103,19.0,0.000027,4.9555,2.2965,4.3193,4.8489,35.9218,-126.0436,-125.8979,-126.6723,15.2901,15.4547,15.1636,9.2376,0.0001,25
102,20.0,0.000022,5.2364,2.2969,4.6128,5.2434,35.6565,-126.4570,-126.4829,-126.8894,15.1352,15.2357,15.0824,9.6493,0.0001,25
101,21.0,0.000018,7.9306,3.4423,6.7010,7.5650,37.0567,-106.0035,-105.8676,-106.4848,22.8002,22.9513,22.7192,13.6316,0.0001,25
100,18.0,0.000031,10.6857,5.0294,8.8122,10.4615,37.7417,-94.0918,-93.8827,-94.6148,27.2641,27.4368,27.1617,16.9824,0.0001,25
99,22.0,0.000013,14.9816,6.8174,12.6421,14.4765,39.8663,-47.1751,-46.7390,-47.7324,44.8461,45.0810,44.7084,23.7186,0.0001,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,16.0,0.000040,33.9930,16.1742,27.3281,32.6486,44.5209,24.6731,22.4652,27.2604,71.7712,70.9816,72.7759,41.1033,0.0001,25
3,11.0,0.000062,35.1285,15.2656,28.3031,32.8051,42.3141,28.1696,25.7649,30.7201,73.0816,72.2165,74.0707,41.2950,0.0001,25
2,17.0,0.000036,35.4140,15.9750,28.1085,33.7719,43.7231,24.1320,23.0828,25.3167,71.5684,71.2127,72.0484,41.4249,0.0001,25
1,12.0,0.000058,35.5721,15.9903,28.3790,33.4360,43.2040,29.6234,26.5534,32.8820,73.6264,72.5116,74.8798,41.6867,0.0001,25


In [378]:
etr_fr_analysis.expes[4]

Unnamed: 0,Unnamed: 1,0
0,test_etr_fr_texts/inputs,"La cloche sonne. C'est la fin de l'année, tant..."
0,test_etr_fr_texts/labels,La cloche sonne. L'école est finie. Super! Ce ...
0,test_etr_fr_texts/predictions,La cloche sonne. C'est la fin de l'année scola...
1,test_etr_fr_texts/inputs,"En arrivant chez lui, Lucas jette son cartable..."
1,test_etr_fr_texts/labels,Lucas rentre dans sa maison. Il goûte. Soudain...
...,...,...
51,test_etr_fr_texts/labels,"Soudain, Bou arrive. Bou attrape la souris. « ..."
51,test_etr_fr_texts/predictions,Bou attrape la souris. « Bravo Bou! » Jules em...
52,test_etr_fr_texts/inputs,C'est un des plus beaux jours de sa vie! D'aut...
52,test_etr_fr_texts/labels,"« Allez, à vélo tout le monde! En route pour u..."
