diff --git a/bert_squeeze/assistants/__init__.py b/bert_squeeze/assistants/__init__.py new file mode 100644 index 0000000..4038508 --- /dev/null +++ b/bert_squeeze/assistants/__init__.py @@ -0,0 +1 @@ +from .train_assistant import TrainAssistant diff --git a/bert_squeeze/assistants/configs/train_bert.yaml b/bert_squeeze/assistants/configs/train_bert.yaml new file mode 100644 index 0000000..5eaf467 --- /dev/null +++ b/bert_squeeze/assistants/configs/train_bert.yaml @@ -0,0 +1,47 @@ +general: + debug: false + do_train: true + do_eval: false + get_mismatched: true + evaluate_during_training: true + labels: [ 0, 1 ] + output_dir: outputs + save_steps: 500 + validation_every_n_epoch: 1 + +train: + adam_eps: 1e-8 + accumulation_steps: 1 + auto_lr: false + discriminative_learning: true + dropout: 0.2 + layer_lr_decay: 0.95 + learning_rates: [ 2e-5 ] + logging_steps: 50 + lr_scheduler: true + max_grad_norm: 1.0 + num_epochs: 10 + optimizer: bertadam + objective: lsl + smoothing: 0.1 + warmup_ratio: 0.06 + warmup_steps: true + weight_decay: 0.01 + +model: + _target_: bert_squeeze.models.lt_bert.LtCustomBert + num_labels: 2 + pretrained_model: "bert-base-cased" + training_config: ${train} + +data: + _target_: bert_squeeze.data.modules.transformer_module.TransformerDataModule + dataset_config: + is_local: false + label_col: label + path: + split: + text_col: text + truncate_mode: head + max_length: 256 + tokenizer_name: ${model.pretrained_model} diff --git a/bert_squeeze/assistants/configs/train_deebert.yaml b/bert_squeeze/assistants/configs/train_deebert.yaml new file mode 100644 index 0000000..c6eed11 --- /dev/null +++ b/bert_squeeze/assistants/configs/train_deebert.yaml @@ -0,0 +1,51 @@ +general: + debug: false + do_train: true + do_eval: false + get_mismatched: true + evaluate_during_training: true + labels: [ 0,1 ] + num_labels: 2 + output_dir: outputs + save_steps: 500 + validation_every_n_epoch: 1 + +train: + adam_eps: 1e-8 + accumulation_steps: 1 + auto_lr: false + discriminative_learning: true + dropout: 0.2 + layer_lr_decay: 0.95 + learning_rates: [ 2e-5 ] + logging_steps: 100 + lr_scheduler: true + max_grad_norm: 1.0 + num_epochs: 10 + optimizer: bertadam + objective: lsl + smoothing: 0.1 + warmup_ratio: 0.06 + warmup_steps: true + weight_decay: 0.01 + + train_highway: true + early_exit_entropy: -1 + +model: + _target_: bert_squeeze.models.lt_deebert.LtDeeBert + training_config: ${train} + pretrained_model: "bert-base-cased" + num_labels: ${general.num_labels} + +data: + _target_: bert_squeeze.data.modules.transformer_module.TransformerDataModule + dataset_config: + is_local: false + path: emotion + split: + text_col: text + label_col: label + truncate_mode: head + tokenizer_name: ${model.pretrained_model} + max_length: 256 \ No newline at end of file diff --git a/examples/configs/train_labse.yaml b/bert_squeeze/assistants/configs/train_fastbert.yaml similarity index 50% rename from examples/configs/train_labse.yaml rename to bert_squeeze/assistants/configs/train_fastbert.yaml index fd3ceff..bb1474b 100644 --- a/examples/configs/train_labse.yaml +++ b/bert_squeeze/assistants/configs/train_fastbert.yaml @@ -1,53 +1,45 @@ -task: - name: train - general: + debug: false do_train: true do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise + get_mismatched: true + evaluate_during_training: true + labels: [ 0, 1 ] output_dir: outputs save_steps: 500 validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - model_path: ../checkpoints/BER-511/ + +callbacks: + - _target_: bert_squeeze.utils.callbacks.fastbert_logic.FastBertLogic train: - training_batch_size: 8 - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: true - learning_rates: [ 2e-5 ] + adam_eps: 1e-8 + accumulation_steps: 1 auto_lr: false + discriminative_learning: true + dropout: 0.2 layer_lr_decay: 0.95 + learning_rates: [ 2e-5 ] + logging_steps: 50 lr_scheduler: true - adam_eps: 1e-8 + max_grad_norm: 1.0 + num_epochs: 10 + optimizer: bertadam + objective: lsl + smoothing: 0.1 warmup_ratio: 0.06 warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 16 - logging_steps: 50 + weight_decay: 0.01 model: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse + _target_: bert_squeeze.models.lt_fastbert.LtFastBert training_config: ${train} - pretrained_model: "sentence-transformers/LaBSE" - num_labels: 6 + pretrained_model: "bert-base-cased" + num_labels: 2 + scorer_type: "fast" data: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule + _target_: bert_squeeze.data.modules.transformer_module.TransformerDataModule dataset_config: is_local: false path: emotion @@ -56,16 +48,4 @@ data: label_col: label truncate_mode: head tokenizer_name: ${model.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - + max_length: 256 \ No newline at end of file diff --git a/bert_squeeze/assistants/configs/train_lr.yaml b/bert_squeeze/assistants/configs/train_lr.yaml new file mode 100644 index 0000000..d926660 --- /dev/null +++ b/bert_squeeze/assistants/configs/train_lr.yaml @@ -0,0 +1,42 @@ +general: + debug: false + do_train: true + do_eval: false + get_mismatched: true + evaluate_during_training: true + labels: [ 0,1 ] + num_labels: 2 + output_dir: outputs + save_steps: 500 + validation_every_n_epoch: 1 + +train: + accumulation_steps: 1 + auto_lr: false + discriminative_learning: false + eval_batch_size: 16 + learning_rates: [ 2e-1 ] + logging_steps: 100 + max_grad_norm: 1.0 + num_epochs: 10 + objective: ce + smoothing: 0.1 + training_batch_size: 16 + +model: + _target_: bert_squeeze.models.lr.BowLogisticRegression + training_config: ${train} + vocab_size: 5000 + embed_dim: 256 + num_labels: ${general.num_labels} + name: "bow_lr" + +data: + _target_: bert_squeeze.data.modules.lr_module.LrDataModule + dataset_config: + is_local: false + label_col: label + path: + split: + text_col: text + max_features: ${model.vocab_size} diff --git a/bert_squeeze/assistants/configs/train_lstm.yaml b/bert_squeeze/assistants/configs/train_lstm.yaml new file mode 100644 index 0000000..033af78 --- /dev/null +++ b/bert_squeeze/assistants/configs/train_lstm.yaml @@ -0,0 +1,40 @@ +general: + debug: false + do_train: true + do_eval: false + get_mismatched: true + evaluate_during_training: true + labels: [ 0,1 ] + num_labels: 2 + output_dir: outputs + save_steps: 500 + validation_every_n_epoch: 1 + +train: + accumulation_steps: 1 + auto_lr: false + dropout: 0.2 + learning_rates: [ 2e-3 ] + logging_steps: 100 + max_grad_norm: 1.0 + num_epochs: 10 + objective: ce + smoothing: 0.1 + +model: + _target_: bert_squeeze.models.lstm.LtLSTM + hidden_dim: 128 + name: lstm + num_labels: ${general.num_labels} + training_config: ${train} + vocab_size: 20000 + +data: + _target_: bert_squeeze.data.modules.lstm_module.LSTMDataModule + dataset_config: + is_local: false + path: emotion + split: + text_col: text + label_col: label + max_features: ${model.vocab_size} \ No newline at end of file diff --git a/examples/configs/train_theseus_labse.yaml b/bert_squeeze/assistants/configs/train_theseus_bert.yaml similarity index 54% rename from examples/configs/train_theseus_labse.yaml rename to bert_squeeze/assistants/configs/train_theseus_bert.yaml index 2f174a5..b3f883e 100644 --- a/examples/configs/train_theseus_labse.yaml +++ b/bert_squeeze/assistants/configs/train_theseus_bert.yaml @@ -1,57 +1,45 @@ -task: - name: train - general: + debug: false do_train: true do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise + get_mismatched: true + evaluate_during_training: true + labels: [ 0, 1 ] output_dir: outputs save_steps: 500 validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - model_path: ../checkpoints/BER-511/ train: - training_batch_size: 8 - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: true - learning_rates: [ 2e-5 ] + adam_eps: 1e-8 + accumulation_steps: 1 auto_lr: false + discriminative_learning: true + dropout: 0.2 layer_lr_decay: 0.95 + learning_rates: [ 2e-5 ] + logging_steps: 50 lr_scheduler: true - adam_eps: 1e-8 + max_grad_norm: 1.0 + num_epochs: 10 + optimizer: bertadam + objective: lsl + smoothing: 0.1 warmup_ratio: 0.06 warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 16 - logging_steps: 50 + weight_decay: 0.01 model: - _target_: bert-squeeze.models.lt_theseus_bert.LtTheseusBert + _target_: bert_squeeze.models.lt_theseus_bert.LtTheseusBert training_config: ${train} - pretrained_model: "sentence-transformers/LaBSE" - num_labels: 6 + pretrained_model: "bert-base-cased" + num_labels: 2 replacement_scheduler: type: "linear" base_replacing_rate: 0.3 coefficient: 0.0006 data: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule + _target_: bert_squeeze.data.modules.transformer_module.TransformerDataModule dataset_config: is_local: false path: emotion @@ -61,15 +49,3 @@ data: truncate_mode: head tokenizer_name: ${model.pretrained_model} max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - diff --git a/bert_squeeze/assistants/train_assistant.py b/bert_squeeze/assistants/train_assistant.py new file mode 100644 index 0000000..ffca42e --- /dev/null +++ b/bert_squeeze/assistants/train_assistant.py @@ -0,0 +1,152 @@ +import logging +import os +from typing import Any, Dict, List, Optional + +from hydra.utils import instantiate +from omegaconf import OmegaConf +from pkg_resources import resource_filename +from pydantic.utils import deep_update +from pytorch_lightning.callbacks.callback import Callback +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.loggers.logger import Logger + +CONFIG_MAPPER = { + "lr": "train_lr.yaml", + "bert": "train_bert.yaml", + "lstm": "train_lstm.yaml", + "deebert": "train_deebert.yaml", + "fastbert": "train_fastbert.yaml", + "theseus-bert": "train_theseus_bert.yaml" +} + + +class TrainAssistant(object): + """ + Helper object that holds and instantiate the needed for training. + + For every available model for fine-tuning it will load a default configuration that + can be overwritten by passing some keyword arguments. + It contains four main sub-configurations: + - general: various high level parameters unrelated to the training procedure + - train: training related parameters + - model: parameters necessary to build and define the model + - data: parameters necessary to define the dataset and featurize it + + Args: + name (str): + name of the base model to fine-tune + dataset_path (str): + path of the dataset to use + general_kwargs (Dict[str, Any]): + keyword arguments that can be added or overwrite the default 'general' configuration + train_kwargs (Dict[str, Any]): + keyword arguments that can be added or overwrite the default 'train' configuration + model_kwargs (Dict[str, Any]): + keyword arguments that can be added or overwrite the default 'model' configuration + data_kwargs (Dict[str, Any]): + keyword arguments that can be added or overwrite the default 'data' configuration + logger_kwargs (Dict[str, Any]): + keyword arguments that can be added or overwrite the default 'logger' configuration + callbacks (List[Callback]): + list of callbacks to use during training + """ + + def __init__( + self, + name: str, + dataset_path: str, + general_kwargs: Dict[str, Any] = None, + train_kwargs: Dict[str, Any] = None, + model_kwargs: Dict[str, Any] = None, + data_kwargs: Dict[str, Any] = None, + logger_kwargs: Dict[str, Any] = None, + callbacks: List[Callback] = None + ): + conf = OmegaConf.load( + resource_filename("bert_squeeze", os.path.join("assistants/configs", CONFIG_MAPPER[name])) + ) + if data_kwargs is not None and data_kwargs.get("dataset_config", {}).get("path") is not None: + logging.warning("Found value for `dataset_config.path` which conflicts with parameter `dataset_path`, using" + "value from the later.") + conf["data"]["dataset_config"]["path"] = dataset_path + + for name, kws in zip(["general", "train", "model", "data", "logger", "callbacks"], + [general_kwargs, train_kwargs, model_kwargs, data_kwargs, logger_kwargs, callbacks]): + if kws is not None: + conf[name] = deep_update(conf[name], kws) + + self.name = name + self.general = conf["general"] + self.train = conf["train"] + self._model_conf = conf["model"] + self._data_conf = conf["data"] + self._logger_conf = conf.get("logger") + self._callbacks_conf = conf.get("callbacks", []) + + self._model = None + self._data = None + self._logger = None + self._callbacks = None + + @property + def model(self) -> Any: + """""" + if self._model is None: + self.model = instantiate(self._model_conf) + return self._model + + @model.setter + def model(self, value: Any) -> None: + """""" + self._model = value + + @property + def data(self) -> Any: + """""" + if self._data is None: + data = instantiate(self._data_conf) + data.prepare_data() + data.setup() + self.data = data + return self._data + + @data.setter + def data(self, value: Any) -> None: + """""" + self._data = value + + @property + def logger(self) -> Logger: + """""" + if self._logger is None: + if self._logger_conf is not None: + self.logger = instantiate(self._logger_conf) + else: + self.logger = TensorBoardLogger(self.general["output_dir"]) + return self._logger + + @logger.setter + def logger(self, value: Logger) -> None: + """""" + self._logger = value + + @property + def callbacks(self) -> List[Callback]: + """""" + if self._callbacks is None: + if self._callbacks_conf is not None: + self.callbacks = [instantiate(callback) for callback in self._callbacks_conf] + else: + self.callbacks = [] + return self._callbacks + + @callbacks.setter + def callbacks(self, value: List[Callback]) -> None: + """""" + self._callbacks = value + + def __repr__(self): + return f"" + + def __str__(self): + return f"Assistant_{self.name}" diff --git a/examples/configs/distil_config.yaml b/examples/configs/distil_config.yaml deleted file mode 100644 index 1fd0088..0000000 --- a/examples/configs/distil_config.yaml +++ /dev/null @@ -1,109 +0,0 @@ -task: - name: distil - strategy: t2lr - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 1000 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - - -train: - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: false - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 2 - training_batch_size: 2 - num_labels: 6 - alpha: 0.5 - logging_steps: 100 - - -model: - _target_: bert-squeeze.distillation.distiller.Distiller - teacher_config: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "sentence-transformers/LaBSE" - num_labels: ${train.num_labels} - checkpoint_path: ../checkpoints/BER-769/N-Step-Checkpoint_1_2000.ckpt - name: labse - student_config: - _target_: bert-squeeze.models.lr.BowLogisticRegression - architecture: "lr" - vocab_size: 10000 - num_labels: ${train.num_labels} - name: bow_lr - training_config: ${train} - training_config: ${train} - - -data: - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - _target_: bert-squeeze.data.modules.distillation_module.DistillationDataModule - teacher_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - name: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.teacher_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - student_module: - _target_: bert-squeeze.data.modules.lr_module.LrDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - max_features: 30000 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - - -hydra: - run: - dir: ./outputs/${task.name}/${task.strategy}/${now:%Y-%m-%d_%H-%M-%S} \ No newline at end of file diff --git a/examples/configs/distil_t2t_config.yaml b/examples/configs/distil_t2t_config.yaml deleted file mode 100644 index a04db0f..0000000 --- a/examples/configs/distil_t2t_config.yaml +++ /dev/null @@ -1,110 +0,0 @@ -task: - name: distil - strategy: t2t - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 1000 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - - -train: - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: adamw - weight_decay: 0.01 - discriminative_learning: false - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 2 - training_batch_size: 2 - num_labels: 6 - alpha: 0.5 - logging_steps: 100 - - -model: - _target_: bert-squeeze.distillation.distiller.Distiller - teacher_config: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "sentence-transformers/LaBSE" - num_labels: ${train.num_labels} - checkpoint_path: ../checkpoints/BER-511/checkpoints/N-Step-Checkpoint_9_4500.ckpt - name: labse - student_config: - _target_: bert-squeeze.models.lt_bert.LtCustomBert - architecture: "transformer" - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "bert-base-cased" - num_labels: ${train.num_labels} - name: bert - training_config: ${train} - - -data: - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - _target_: bert-squeeze.data.modules.distillation_module.DistillationDataModule - teacher_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - name: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.teacher_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - student_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.student_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - - -hydra: - run: - dir: ./outputs/${task.name}/${task.strategy}/${now:%Y-%m-%d_%H-%M-%S} \ No newline at end of file diff --git a/examples/configs/hard_distil_t2t_config.yaml b/examples/configs/hard_distil_t2t_config.yaml deleted file mode 100644 index e8bc69e..0000000 --- a/examples/configs/hard_distil_t2t_config.yaml +++ /dev/null @@ -1,124 +0,0 @@ -task: - name: distil - strategy: t2t - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 1000 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - - -train: - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: adamw - weight_decay: 0.01 - discriminative_learning: false - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 2 - training_batch_size: 2 - num_labels: 6 - alpha: 0.5 - logging_steps: 100 - - -model: - _target_: bert-squeeze.distillation.distiller.Distiller - teacher_config: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "sentence-transformers/LaBSE" - num_labels: ${train.num_labels} - checkpoint_path: ../checkpoints/BER-511/checkpoints/N-Step-Checkpoint_9_4500.ckpt - name: labse - student_config: - _target_: bert-squeeze.models.lt_bert.LtCustomBert - architecture: "transformer" - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "bert-base-cased" - num_labels: ${train.num_labels} - name: bert - training_config: ${train} - - -data: - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - _target_: bert-squeeze.data.modules.distillation_module.DistillationDataModule - teacher_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.teacher_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - student_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.student_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - hard_labeler: - _target_: bert-squeeze.distillation.utils.labeler.HardLabeler - labeler_config: - teacher: ${model.teacher_config} - pretrained_model: ${model.teacher_config.pretrained_model} - num_labels: ${model.teacher_config.num_labels} - checkpoint_path: ${model.teacher_config.checkpoint_path} - max_length: ${data.teacher_module.max_length} - dataset_config: - is_local: false - name: go_emotions - split: raw - text_col: text - max_samples: 10000 - max_length: ${data.teacher_module.max_length} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - - -hydra: - run: - dir: ./outputs/${task.name}/${task.strategy}/${now:%Y-%m-%d_%H-%M-%S} \ No newline at end of file diff --git a/examples/configs/soft_distil_t2t_config.yaml b/examples/configs/soft_distil_t2t_config.yaml deleted file mode 100644 index a5aa088..0000000 --- a/examples/configs/soft_distil_t2t_config.yaml +++ /dev/null @@ -1,116 +0,0 @@ -task: - name: distil - strategy: t2t - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 1000 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - - -train: - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: adamw - weight_decay: 0.01 - discriminative_learning: false - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 2 - training_batch_size: 2 - num_labels: 6 - alpha: 0.5 - logging_steps: 100 - - -model: - _target_: bert-squeeze.distillation.distiller.Distiller - teacher_config: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "sentence-transformers/LaBSE" - num_labels: ${train.num_labels} - checkpoint_path: ../checkpoints/BER-511/checkpoints/N-Step-Checkpoint_9_4500.ckpt - name: labse - student_config: - _target_: bert-squeeze.models.lt_bert.LtCustomBert - architecture: "transformer" - training_config: ${train} # for the sake of compatibility but useless as model won't be finetuned - pretrained_model: "bert-base-cased" - num_labels: ${train.num_labels} - name: bert - training_config: ${train} - - -data: - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - _target_: bert-squeeze.data.modules.distillation_module.DistillationDataModule - teacher_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.teacher_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - student_module: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.student_config.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - soft_data_config: - is_local: false - name: go_emotions - split: raw - text_col: text - max_samples: 10000 - - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - - -hydra: - run: - dir: ./outputs/${task.name}/${task.strategy}/${now:%Y-%m-%d_%H-%M-%S} \ No newline at end of file diff --git a/examples/configs/train_deebert.yaml b/examples/configs/train_deebert.yaml deleted file mode 100644 index f5cda78..0000000 --- a/examples/configs/train_deebert.yaml +++ /dev/null @@ -1,75 +0,0 @@ -task: - name: train - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 500 - logging_steps: 50 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - -train: - train_highway: true - early_exit_entropy: -1 - - dropout: 0.2 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: true - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 1 - training_batch_size: 1 - num_epochs: 10 - logging_steps: 50 - -model: - _target_: bert-squeeze.models.exit_berts.deebert.DeeBert - training_config: ${train} - pretrained_model: "sentence-transformers/LaBSE" - num_labels: 6 - -data: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - -hydra: - run: - dir: ./outputs/${task.name}/${now:%Y-%m-%d_%H-%M-%S} \ No newline at end of file diff --git a/examples/configs/train_fastlabse.yaml b/examples/configs/train_fastlabse.yaml deleted file mode 100644 index 1544d4f..0000000 --- a/examples/configs/train_fastlabse.yaml +++ /dev/null @@ -1,74 +0,0 @@ -task: - name: train - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 500 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - -finetuning_callback: - _target_: bert-squeeze.utils.callbacks.fastbert_logic.FastBertLogic - -train: - training_batch_size: 8 - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: true - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 16 - logging_steps: 50 - -model: - _target_: bert-squeeze.models.lt_fastbert.LtFastBert - training_config: ${train} - pretrained_model: "sentence-transformers/LaBSE" - num_labels: 6 - scorer_type: "fast" - -data: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - diff --git a/examples/configs/train_labse_pruning.yaml b/examples/configs/train_labse_pruning.yaml deleted file mode 100644 index 0f249c8..0000000 --- a/examples/configs/train_labse_pruning.yaml +++ /dev/null @@ -1,73 +0,0 @@ -task: - name: train - -general: - do_train: true - do_eval: false - debug: false - pruning: - _target_: bert-squeeze.utils.callbacks.lottery_ticket.LotteryTicket - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 500 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - model_path: ../checkpoints/BER-511/ - -train: - training_batch_size: 2 - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: true - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 2 - logging_steps: 50 - -model: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse - training_config: ${train} - pretrained_model: "sentence-transformers/LaBSE" - num_labels: 6 - -data: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - diff --git a/examples/configs/train_labse_quantize.yaml b/examples/configs/train_labse_quantize.yaml deleted file mode 100644 index 1e4bafc..0000000 --- a/examples/configs/train_labse_quantize.yaml +++ /dev/null @@ -1,74 +0,0 @@ -task: - name: train - -general: - quantization: - _target_: bert-squeeze.utils.callbacks.quantization.DynamicQuantization - layers_to_quantize: { 'torch.nn.Linear' } - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 500 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - model_path: ../checkpoints/BER-511/ - -train: - training_batch_size: 8 - num_epochs: 10 - dropout: 0.2 - objective: lsl - smoothing: 0.1 - optimizer: bertadam - weight_decay: 0.01 - discriminative_learning: true - learning_rates: [ 2e-5 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - eval_batch_size: 16 - logging_steps: 50 - -model: - _target_: bert-squeeze.models.lt_labse.LtCustomLabse - training_config: ${train} - pretrained_model: "sentence-transformers/LaBSE" - num_labels: 6 - -data: - _target_: bert-squeeze.data.modules.transformer_module.TransformerDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - truncate_mode: head - tokenizer_name: ${model.pretrained_model} - max_length: 256 - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - diff --git a/examples/configs/train_lr.yaml b/examples/configs/train_lr.yaml deleted file mode 100644 index a1ff841..0000000 --- a/examples/configs/train_lr.yaml +++ /dev/null @@ -1,70 +0,0 @@ -task: - name: train - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 500 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismatched: true - -train: - training_batch_size: 16 - eval_batch_size: 16 - num_epochs: 10 - dropout: 0.2 - objective: ce - smoothing: 0.1 - optimizer: sgd - weight_decay: 0.01 - discriminative_learning: false - learning_rates: [ 2e-1 ] - auto_lr: false - layer_lr_decay: 0.95 - lr_scheduler: true - adam_eps: 1e-8 - warmup_ratio: 0.06 - warmup_steps: true - max_grad_norm: 1.0 - accumulation_steps: 1 - logging_steps: 100 - -model: - _target_: bert-squeeze.models.lr.BowLogisticRegression - training_config: ${train} - vocab_size: 5000 - embed_dim: 256 - num_labels: 6 - name: bow_lr - -data: - _target_: bert-squeeze.data.modules.lr_module.LrDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - max_features: ${model.vocab_size} - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - diff --git a/examples/configs/train_lstm.yaml b/examples/configs/train_lstm.yaml deleted file mode 100644 index 95e2745..0000000 --- a/examples/configs/train_lstm.yaml +++ /dev/null @@ -1,64 +0,0 @@ -task: - name: train - -general: - do_train: true - do_eval: false - debug: false - labels: - - sadness - - joy - - love - - anger - - fear - - surprise - output_dir: outputs - save_steps: 500 - validation_every_n_epoch: 1 - evaluate_during_training: true - get_mismated: true - -train: - training_batch_size: 16 - eval_batch_size: 16 - num_epochs: 10 - dropout: 0.2 - objective: ce - smoothing: 0.1 - optimizer: sgd - learning_rates: [ 2e-3 ] - auto_lr: false - lr_scheduler: true - max_grad_norm: 1.0 - accumulation_steps: 1 - logging_steps: 100 - -model: - _target_: bert-squeeze.models.lstm.LtLSTM - training_config: ${train} - vocab_len: 20000 - hidden_dim: 128 - num_labels: 6 - name: lstm - -data: - _target_: bert-squeeze.data.modules.lstm_module.LSTMDataModule - dataset_config: - is_local: false - path: emotion - split: - text_col: text - label_col: label - max_features: ${model.vocab_len} - train_batch_size: ${train.training_batch_size} - eval_batch_size: ${train.eval_batch_size} - -neptune: - user_name: julesbelveze - project: bert-tricks - tags: [ ] - logger: - _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger - project: ${neptune.user_name}/${neptune.project} - name: ${task.name} - diff --git a/examples/main.py b/examples/main.py deleted file mode 100644 index 211abe9..0000000 --- a/examples/main.py +++ /dev/null @@ -1,97 +0,0 @@ -# To run such a program one can run it the following way: -# python3 -m bert-squeeze.main -cp=configs -cn=training_config -# -# To override arguments of the config file run as follow: -# python3 -m bert_squeeze.main -cp=configs -cn=training_config --task=test +new_attr=test - -import hydra -import logging -import sys -import torch -from dotenv import load_dotenv -from hydra.utils import instantiate -from pkg_resources import resource_filename -from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import LearningRateMonitor - -from bert_squeeze.utils import get_neptune_tags, load_model_from_exp -from bert_squeeze.utils.callbacks import CheckpointEveryNSteps -from bert_squeeze.utils.errors import ConfigurationException - -load_dotenv() -logging.basicConfig(stream=sys.stdout, level=logging.INFO) - - -@hydra.main(config_path="./configs/", config_name="training_config") -def run(args): - logging.info(f"Using config: {args}") - - data = instantiate(args.data) - data.prepare_data() - data.setup() - - if args.general.do_train: - neptune_logger = instantiate(args.neptune.logger) - neptune_logger.experiment["sys/tags"].add(get_neptune_tags(args)) - neptune_logger.log_hyperparams(args) - - model = instantiate(args.model, _recursive_=False) - - callbacks = [CheckpointEveryNSteps(args.general.save_steps)] - if args.train.get("lr_scheduler", False): - callbacks.append(LearningRateMonitor(logging_interval='epoch')) - - if args.general.get("quantization", None) is not None: - quantization_callback = instantiate(args.general.quantization) - callbacks.append(quantization_callback) - - if args.general.get("pruning", None) is not None: - pruning_callback = instantiate(args.general.pruning) - callbacks.append(pruning_callback) - - if "fast" in args.model._target_: - callbacks.append(instantiate(args.finetuning_callback)) - - # NOTE: when performing manual optimization the 'gradient_clip_val' flag needs - # to be set to None. - # Issue here: https://github.com/PyTorchLightning/pytorch-lightning/issues/7698 - trainer = Trainer( - gpus=torch.cuda.device_count(), - accumulate_grad_batches=args.train.accumulation_steps, - gradient_clip_val=args.train.max_grad_norm, - accelerator='ddp', - auto_lr_find=args.train.auto_lr, - logger=neptune_logger, - callbacks=callbacks, - check_val_every_n_epoch=args.general.validation_every_n_epoch - ) - - logging.info(f"Starting training: {model}") - - trainer.fit( - model=model, - train_dataloaders=data.train_dataloader(), - val_dataloaders=data.val_dataloader() - ) - - # exporting trained model to ONNX - input_sample = iter(data.test_dataloader).next() - model.to_onnx(f"{args.general.output_dir}/model.onnx", input_sample, export_params=True) - - if args.general.do_eval: - if not hasattr(args.general, "model_path"): - raise ConfigurationException("You are on 'eval' mode you need to specify path to model checkpoint.") - args.general.model_path = resource_filename("bert-squeeze", args.general.model_path) - - model = load_model_from_exp(path_to_folder=args.general.model_path, module=args.model._target_) - - model.eval() - trainer = Trainer( - gpus=torch.cuda.device_count(), - accelerator='ddp' - ) - trainer.test(model, datamodule=data) - - -if __name__ == "__main__": - run() diff --git a/tests/assistants/test_train_assistant.py b/tests/assistants/test_train_assistant.py new file mode 100644 index 0000000..9f0e8f1 --- /dev/null +++ b/tests/assistants/test_train_assistant.py @@ -0,0 +1,94 @@ +import pytest +from pytorch_lightning.loggers import TensorBoardLogger +from torch.utils.data import DataLoader + +from bert_squeeze.assistants.train_assistant import TrainAssistant +from bert_squeeze.data.modules import LSTMDataModule, LrDataModule, TransformerDataModule +from bert_squeeze.models import BowLogisticRegression, LtCustomBert, LtDeeBert, LtFastBert, LtLSTM, LtTheseusBert + + +@pytest.fixture +def lr_assistant(): + return TrainAssistant( + "lr", + dataset_path="emotion", + general_kwargs={"labels": [0, 1, 2, 3, 4, 5], "num_labels": 6} + ) + + +class TestTrainAssistant: + def test_sanity_assistant(self, lr_assistant): + """""" + assert lr_assistant.general.num_labels == 6 + assert isinstance(lr_assistant.model, BowLogisticRegression) + assert isinstance(lr_assistant.data, LrDataModule) + assert isinstance(lr_assistant.logger, TensorBoardLogger) + + def test_data(self, lr_assistant): + """""" + assert isinstance(lr_assistant.data.train_dataloader(), DataLoader) + assert len(lr_assistant.data.train_dataloader()) == 1000 + + def test_bert_assistant(self): + """""" + bert_assistant = TrainAssistant( + "bert", + dataset_path="emotion", + general_kwargs={"labels": [0, 1, 2, 3, 4, 5], "num_labels": 6}, + model_kwargs={"pretrained_model": "bert-base-uncased"} + ) + assert bert_assistant.general.num_labels == 6 + assert isinstance(bert_assistant.model, LtCustomBert) + assert bert_assistant.model.encoder.config._name_or_path == "bert-base-uncased" + assert isinstance(bert_assistant.data, TransformerDataModule) + + def test_lstm_assistant(self): + """""" + lstm_assistant = TrainAssistant( + "lstm", + dataset_path="emotion", + general_kwargs={"labels": [0, 1, 2, 3, 4, 5], "num_labels": 6} + ) + assert lstm_assistant.general.num_labels == 6 + assert isinstance(lstm_assistant.model, LtLSTM) + assert isinstance(lstm_assistant.data, LSTMDataModule) + + def test_deebert_assistant(self): + """""" + deebert_assistant = TrainAssistant( + "deebert", + dataset_path="emotion", + general_kwargs={"labels": [0, 1, 2, 3, 4, 5], "num_labels": 6}, + model_kwargs={"pretrained_model": "bert-base-uncased"} + ) + assert deebert_assistant.general.num_labels == 6 + assert isinstance(deebert_assistant.model, LtDeeBert) + assert deebert_assistant.model.bert.config._name_or_path == "bert-base-uncased" + assert isinstance(deebert_assistant.data, TransformerDataModule) + + def test_fastbert_assistant(self): + """""" + fastbert_assistant = TrainAssistant( + "fastbert", + dataset_path="emotion", + general_kwargs={"labels": [0, 1, 2, 3, 4, 5], "num_labels": 6}, + model_kwargs={"pretrained_model": "bert-base-uncased"} + ) + assert fastbert_assistant.general.num_labels == 6 + assert isinstance(fastbert_assistant.model, LtFastBert) + assert fastbert_assistant.model.encoder.config._name_or_path == "bert-base-uncased" + assert isinstance(fastbert_assistant.data, TransformerDataModule) + assert len(fastbert_assistant.callbacks) > 0 + + def test_theseusbert_assistant(self): + """""" + fastbert_assistant = TrainAssistant( + "theseus-bert", + dataset_path="emotion", + general_kwargs={"labels": [0, 1, 2, 3, 4, 5], "num_labels": 6}, + model_kwargs={"pretrained_model": "bert-base-uncased"} + ) + assert fastbert_assistant.general.num_labels == 6 + assert isinstance(fastbert_assistant.model, LtTheseusBert) + assert fastbert_assistant.model.encoder.config._name_or_path == "bert-base-uncased" + assert isinstance(fastbert_assistant.data, TransformerDataModule)