# auto_experiment
> automatically research on the relationship between the performance and meta parameters (a.k.a. hyperparameters or config) via searching (a.k.a. sweeping) experiments. 

See https://github.com/google-research/tuning_playbook for scientific research principles on meta parameters tuning. 

In addition to that guide, we also follow the paper "Statistical Comparisons of Classifiers over Multiple Data Sets", using statistical hypothesis testing to compare the performance of different models (produced by different meta parameters).


In [1]:
#| default_exp auto.experiment

In [2]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

In [3]:
#| export
from namable_classify.core import ClassificationTask, ClassificationTaskConfig
from boguan_yuequ.auto import AutoYueQuAlgorithm
import lightning as L
from namable_classify.utils import runs_path
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelSummary, StochasticWeightAveraging, DeviceStatsMonitor, LearningRateMonitor, LearningRateFinder
from lightning.pytorch.loggers import TensorBoardLogger, CSVLogger, WandbLogger



In [4]:
AutoYueQuAlgorithm?

[0;31mInit signature:[0m
[0mAutoYueQuAlgorithm[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0morg_module[0m[0;34m:[0m [0mtorch[0m[0;34m.[0m[0mnn[0m[0;34m.[0m[0mmodules[0m[0;34m.[0m[0mmodule[0m[0;34m.[0m[0mModule[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malgorithm_name_or_path[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtry_download_finetuned[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mparameter_efficiency_budget[0m[0;34m:[0m [0mfloat[0m [0;34m=[0m [0mnan[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0margs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mFile:[0m           ~/repos/research/cv/peft_frameworks/BoGuan_YueQu/boguan_yuequ/auto.py
[0;31mType:[0m           type
[0;31m

In [5]:
#| export
def run_with_config(config:ClassificationTaskConfig):
    cls_task = ClassificationTask(config)
    cls_task.print_model_pretty()
    AutoYueQuAlgorithm(cls_task, config.yuequ)
    trainer = L.Trainer(default_root_dir=runs_path, enable_checkpointing=True, 
                    enable_model_summary=True, 
                    num_sanity_val_steps=2, # 防止 val 在训了好久train才发现崩溃
                    callbacks=[
                        # EarlyStopping(monitor="val_loss", mode="min")
                        EarlyStopping(monitor="val_acc1", mode="max", check_finite=True, 
                                    #   patience=5, 
                                      patience=10, 
                                    #   patience=6, 
                                      check_on_train_epoch_end=False,  # check on validation end
                                      verbose=True),
                        ModelSummary(max_depth=3),
                        # https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-weight-averaging/
                        # StochasticWeightAveraging(swa_lrs=1e-2), 
                        # DeviceStatsMonitor(cpu_stats=True)
                        LearningRateMonitor(), 
                        # LearningRateFinder() # 有奇怪的bug
                               ]
                    # , max_epochs=15
                    # , gradient_clip_val=1.0, gradient_clip_algorithm="value"
                    , logger=[
                        # TensorBoardLogger(save_dir=runs_path/"tensorboard"),
                        TensorBoardLogger(save_dir=runs_path),
                              CSVLogger(save_dir=runs_path), 
                              WandbLogger(project="namable_classify", name="test")
                              ]
                    # , profiler="simple"
                    # , fast_dev_run=True
                    # limit_train_batches=10, limit_val_batches=5
                    # strategy="ddp", accelerator="gpu", devices=4
                    )
    trainer.fit(cls_task, datamodule=cls_task.lit_data)
    val_result = trainer.validate(cls_task, datamodule=cls_task.lit_data)
    test_result = trainer.test(cls_task, datamodule=cls_task.lit_data)
    # val_acc1 = val_result[0]["val_acc1"]
    # test_acc1 = test_result[0]["test_acc1"]
    # return val_acc1, test_acc1
    return val_result, test_result
    

In [None]:
#| export
from namable_classify.core import ClassificationModelConfig, ClassificationTaskConfig, ClassificationDataConfig
fixed_meta_parameters = ClassificationTaskConfig(
    label_smoothing=0.1,  # 未必固定。
    cls_model_config=ClassificationModelConfig(
        checkpoint = "google/vit-base-patch16-224-in21k"
    ), 
    dataset_config = ClassificationDataConfig(
        batch_size=64, # 经过前期经验, 这个方便站在61服务器跑, 大概10G显存。 固定基于这个调参
    )
)


In [None]:
#| export
# 先直接跑两个, 来不及写了

import numpy as np
# 设置采样的起始值和结束值
start = np.log(1e-5)
end = np.log(1e-1)
learning_rates = np.logspace(start, end, num=30, base=np.e)
np.random.shuffle(learning_rates)
learning_rates = learning_rates.tolist()

In [None]:
learning_rates

In [None]:
#| export
# seed = 0
seed = 2
# seed = 1
def learning_rate_exec(learning_rate):
    parameters = fixed_meta_parameters.copy()
    parameters.yuequ = 'full_finetune'
    parameters.experiment_index = seed
    parameters.learning_rate = learning_rate
    return run_with_config(parameters)

In [9]:
#| export
run_names = [f"{lr:.2e}" for lr in learning_rates]

In [10]:
run_names

['1.37e-05',
 '1.17e-03',
 '1.27e-04',
 '1.61e-03',
 '7.88e-03',
 '1.89e-05',
 '2.59e-05',
 '8.53e-04',
 '2.21e-03',
 '1.08e-02',
 '3.86e-02',
 '1.00e-01',
 '5.74e-03',
 '3.04e-03',
 '2.81e-02',
 '6.72e-05',
 '1.00e-05',
 '4.18e-03',
 '6.21e-04',
 '2.40e-04',
 '2.04e-02',
 '9.24e-05',
 '1.49e-02',
 '4.52e-04',
 '1.74e-04',
 '7.28e-02',
 '4.89e-05',
 '3.56e-05',
 '5.30e-02',
 '3.29e-04']

In [None]:
#| export
#| eval: false
from namable_classify.auto.run import auto_run
auto_run(learning_rate_exec, learning_rates, run_names, f"sweep_lr_full_finetune-{seed}")

Seed set to 1


new running round 0
executing run_name 6.723357536499339e-05




Trainer will use only 1 of 8 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=8)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Using Full Finetuning Algorithm, not changing the model structure. 
Files already downloaded and verified


/home/ycm/program_files/managers/conda/envs/hf_ai/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


Files already downloaded and verified
Files already downloaded and verified


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33m2603119857[0m ([33mhandicraft-computing[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011134987755212933, max=1.0…

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]


Validation: |          | 0/? [00:00<?, ?it/s]

/home/ycm/program_files/managers/conda/envs/hf_ai/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


Files already downloaded and verified


In [9]:
#| export
import optuna
def objective(trial):
    
    # parameter_efficiency_budget = trial.suggest_float("parameter_efficiency_budget", 1e-7, 1, log=True)
    # 对每一个目标超参数 grid search
    result_dict = dict()
    for yuequ in ["full_finetune", "adapter", "LORA"]:
        for experiment_index in range(5):
            # 每一个人的hyperparameters不一样。
            learning_rate = trial.suggest_float(f"{yuequ}-learning_rate", 1e-5, 1e-1, log=True)
            config = ClassificationTaskConfig(
                yuequ=yuequ,
                experiment_index=experiment_index,
                learning_rate=learning_rate,
            )
            val_acc1, test_acc1 = run_with_config(config)
            # 注意不要用 test_acc1 调参。
            # 我们的原则是每一个目标超参验证集到最优, 然后再用最优的超参得到的模型(其实应该重新训练一遍)在测试集上测试。
            # 在论文研究的第一阶段，应该调参。时间不够的话
            result_dict[f"{yuequ}-{experiment_index}-val_acc1"] = val_acc1
            result_dict[f"{yuequ}_{experiment_index}-test_acc1"] = test_acc1
        mean_results = [result_dict[f"{yuequ}-{i}-val_acc1"] for i in range(5)]
        result_dict[f"{yuequ}-mean-val_acc1"] = sum(mean_results) / len(mean_results)
        mean_results = [result_dict[f"{yuequ}_{i}-test_acc1"] for i in range(5)]
        result_dict[f"{yuequ}-mean-test_acc1"] = sum(mean_results) / len(mean_results)
    return result_dict
    

In [1]:
#| hide
import nbdev; nbdev.nbdev_export()