# auto_experiment
> automatically research on the relationship between the performance and meta parameters (a.k.a. hyperparameters or config) via searching (a.k.a. sweeping) experiments. 

See https://github.com/google-research/tuning_playbook for scientific research principles on meta parameters tuning. 

In addition to that guide, we also follow the paper "Statistical Comparisons of Classifiers over Multiple Data Sets", using statistical hypothesis testing to compare the performance of different models (produced by different meta parameters).


In [None]:
#| default_exp auto.experiment

In [7]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
#| export
from namable_classify.core import ClassificationTask, ClassificationTaskConfig
from boguan_yuequ.auto import AutoYueQuAlgorithm
import lightning as L
from namable_classify.utils import runs_path
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelSummary, StochasticWeightAveraging, DeviceStatsMonitor, LearningRateMonitor, LearningRateFinder
from lightning.pytorch.loggers import TensorBoardLogger, CSVLogger, WandbLogger



In [10]:
AutoYueQuAlgorithm?

[0;31mInit signature:[0m
[0mAutoYueQuAlgorithm[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0morg_module[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mmodules[0m[0;34m.[0m[0mmodule[0m[0;34m.[0m[0mModule[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malgorithm_name_or_path[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtry_download_finetuned[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mparameter_efficiency_budget[0m[0;34m:[0m [0mfloat[0m [0;34m=[0m [0mnan[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0margs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mFile:[0m           ~/repos/research/cv/peft_frameworks/BoGuan_YueQu/boguan_yuequ/auto.py
[0;31mType:[0m           type
[0;31m

In [11]:
import optuna
optuna.Trial?
L.Trainer?

[0;31mInit signature:[0m
[0mL[0m[0;34m.[0m[0mTrainer[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maccelerator[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mlightning[0m[0;34m.[0m[0mpytorch[0m[0;34m.[0m[0maccelerators[0m[0;34m.[0m[0maccelerator[0m[0;34m.[0m[0mAccelerator[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstrategy[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mlightning[0m[0;34m.[0m[0mpytorch[0m[0;34m.[0m[0mstrategies[0m[0;34m.[0m[0mstrategy[0m[0;34m.[0m[0mStrategy[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdevices[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mList[0m[0;34m[[0m[0mint[0m[0;34m][0m[0;34m,[0m [0mstr[0m[0;34m,[0m [0mint[0m[0;34m][0m [0;34m=[0m [0;34m'auto'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnum_nodes[0m[0;34m:

In [12]:
# | export
from clearml import Task
import optuna
from optuna.integration import PyTorchLightningPruningCallback

auto_exp_runs_path = runs_path / "auto_experiment"


def run_with_config(
    config: ClassificationTaskConfig,
    trial: optuna.Trial = None,
    tuning_metric="val_acc1",  # Seriously, 为了学术诚信规范，我们AI科研者不能用 "test_acc1" 来调参。
    tuning_mode="max",
):
    L.seed_everything(config.experiment_index)
    cls_task = ClassificationTask(config)
    cls_task.print_model_pretty()
    AutoYueQuAlgorithm(cls_task, config.yuequ)
    Task.init(project_name=config.experiment_project, task_name=config.experiment_task)
    # https://clear.ml/docs/latest/docs/guides/frameworks/pytorch_lightning/pytorch_lightning_example/

    callbacks = [
        # EarlyStopping(monitor="val_loss", mode="min")
        EarlyStopping(
            monitor=tuning_metric,
            mode=tuning_mode,
            check_finite=True,
            #   patience=5,
            patience=10,
            #   patience=6,
            check_on_train_epoch_end=False,  # check on validation end
            verbose=True,
        ),
        ModelSummary(max_depth=3),
        # https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-weight-averaging/
        # StochasticWeightAveraging(swa_lrs=1e-2),
        # DeviceStatsMonitor(cpu_stats=True)
        LearningRateMonitor(),
        # LearningRateFinder() # 有奇怪的bug
    ]
    if trial is not None:
        callbacks.append(PyTorchLightningPruningCallback(trial, monitor=tuning_metric))

    logger = [
        TensorBoardLogger(save_dir=auto_exp_runs_path),
        CSVLogger(save_dir=auto_exp_runs_path),
        WandbLogger(project=config.experiment_project, name=config.experiment_task),
    ]

    trainer = L.Trainer(
        default_root_dir=auto_exp_runs_path,
        enable_checkpointing=True,
        enable_model_summary=True,
        num_sanity_val_steps=2,  # 防止 val 在训了好久train才发现崩溃
        callbacks=callbacks
        # , max_epochs=15
        # , gradient_clip_val=1.0, gradient_clip_algorithm="value"
        ,
        logger=logger,
        # , profiler="simple"
        # , fast_dev_run=True
        # limit_train_batches=10, limit_val_batches=5
        # strategy="ddp", accelerator="gpu", devices=4
    )

    trainer.fit(cls_task, datamodule=cls_task.lit_data)
    val_result = trainer.validate(cls_task, datamodule=cls_task.lit_data)
    test_result = trainer.test(cls_task, datamodule=cls_task.lit_data)
    # val_acc1 = val_result[0]["val_acc1"]
    # test_acc1 = test_result[0]["test_acc1"]
    # return val_acc1, test_acc1
    return val_result, test_result

In [13]:
#| export
from namable_classify.core import ClassificationModelConfig, ClassificationTaskConfig, ClassificationDataConfig
fixed_meta_parameters = ClassificationTaskConfig(
    experiment_project = "", 
    label_smoothing=0.1,  # 未必固定。
    cls_model_config=ClassificationModelConfig(
        checkpoint = "google/vit-base-patch16-224-in21k"
    ), 
    dataset_config = ClassificationDataConfig(
        batch_size=64, # 经过前期经验, 这个方便站在61服务器跑, 大概10G显存。 固定基于这个调参
    )
)


我们想要得到一个dataframe，这一次Study，每一次实验lightning存在那个目录，最后得到的val和test指标是什么，optuna建议进去的超参数是什么，其他超参数是什么？

In [14]:
#| export
study_results = []

In [19]:
#| export
from boguan_yuequ.auto import huggingface_peft_budget_config_key
yuequ_tried_algs = [k.name for k in huggingface_peft_budget_config_key.keys()]

In [20]:
yuequ_tried_algs

['LORA', 'ADALORA', 'LOHA', 'LOKR', 'OFT', 'VERA', 'FOURIERFT', 'VBLORA']

In [None]:
#| export
import optuna
def objective(trial):
    
    # parameter_efficiency_budget = trial.suggest_float("parameter_efficiency_budget", 1e-7, 1, log=True)
    # 对每一个目标超参数 grid search
    result_dict = dict()
    for yuequ in yuequ_tried_algs:
        
        learning_rate = trial.suggest_float(f"{yuequ}-learning_rate", 1e-5, 1e-1, log=True)
        for experiment_index in range(5):
            # 当我们选定 experiment_index 之后，就不要随机建议参数了，现在我们参数量保持一样，重复5次实验。
            
            # 每一个人的hyperparameters不一样。
            config = ClassificationTaskConfig(
                yuequ=yuequ,
                experiment_index=experiment_index,
                learning_rate=learning_rate,
            )
            val_acc1, test_acc1 = run_with_config(config)
            # 注意不要用 test_acc1 调参。
            # 我们的原则是每一个目标超参验证集到最优, 然后再用最优的超参得到的模型(其实应该重新训练一遍)在测试集上测试。
            # 在论文研究的第一阶段，应该调参。时间不够的话
            result_dict[f"{yuequ}-{experiment_index}-val_acc1"] = val_acc1
            result_dict[f"{yuequ}-{experiment_index}-test_acc1"] = test_acc1
        mean_results = [result_dict[f"{yuequ}-{i}-val_acc1"] for i in range(5)]
        result_dict[f"{yuequ}-mean-val_acc1"] = sum(mean_results) / len(mean_results)
        mean_results = [result_dict[f"{yuequ}-{i}-test_acc1"] for i in range(5)]
        result_dict[f"{yuequ}-mean-test_acc1"] = sum(mean_results) / len(mean_results)
    return result_dict
    

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()