Config files for running models

JiyangZhang · JiyangZhang · commit 66322532540f · 2023-12-04T13:26:42.000-06:00
diff --git a/python/configs/codeT5.yaml b/python/configs/codeT5.yaml
@@ -0,0 +1,55 @@
+data:
+  batch_size: 1
+  eval_batch_size: 1
+
+model:
+  pretrained_model: Salesforce/codet5-base
+  pretrained_tokenizer: Salesforce/codet5-base
+  skip_special_token_when_generate: False
+  beam_size: 20
+
+trainer:
+  auto_select_gpus: true
+  gpus: -1
+  strategy: ddp
+  # find_unused_parameters: false
+  precision: 16
+
+  # max_steps: 50_000
+  # fast_dev_run: true
+  max_epochs: 30
+  accumulate_grad_batches: 4 # effective batch size 1*4(gpu)*4(accumulate) = 32
+
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: bleu/val
+        mode: max
+        min_delta: 0
+        patience: 5
+        verbose: true
+    # - class_path: pytorch_lightning.callbacks.StochasticWeightAveraging  # Incompatible with EarlyStopping
+    - class_path: pytorch_lightning.callbacks.lr_monitor.LearningRateMonitor
+      init_args:
+        logging_interval: step
+
+optimizer:
+  class_path: transformers.optimization.AdamW
+  init_args:
+    lr: 0.00005
+    eps: 1e-8
+    weight_decay: 0.01
+
+lr_scheduler:
+  class_path: torch.optim.lr_scheduler.OneCycleLR
+  init_args:
+    max_lr: 0.00005
+    pct_start: 0.1
+    div_factor: 1
+    total_steps: 30
+    anneal_strategy: linear
+
+ckpt:
+  save_top_k: 1
+  monitor: bleu/val
+  mode: max
diff --git a/python/configs/coditT5.yaml b/python/configs/coditT5.yaml
@@ -0,0 +1,55 @@
+data:
+  batch_size: 1
+  eval_batch_size: 1
+
+model:
+  pretrained_model: ../models/pretrain/model/
+  pretrained_tokenizer: ../models/codeT5Tokenizer
+  beam_size: 20
+  skip_special_token_when_generate: False
+
+trainer:
+  auto_select_gpus: true
+  gpus: -1
+  strategy: ddp
+  # find_unused_parameters: false
+  precision: 16
+
+  # max_steps: 50_000
+  # fast_dev_run: true
+  max_epochs: 30
+  accumulate_grad_batches: 12 # effective batch size 1*4(gpu)*12(accumulate) = 48
+
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: bleu/val
+        mode: max
+        min_delta: 0
+        patience: 5
+        verbose: true
+    # - class_path: pytorch_lightning.callbacks.StochasticWeightAveraging  # Incompatible with EarlyStopping
+    - class_path: pytorch_lightning.callbacks.lr_monitor.LearningRateMonitor
+      init_args:
+        logging_interval: step
+
+optimizer:
+  class_path: transformers.optimization.AdamW
+  init_args:
+    lr: 0.00005
+    eps: 1e-8
+    weight_decay: 0.01
+
+lr_scheduler:
+  class_path: torch.optim.lr_scheduler.OneCycleLR
+  init_args:
+    max_lr: 0.00005
+    pct_start: 0.1
+    div_factor: 1
+    total_steps: 50
+    anneal_strategy: linear
+
+ckpt:
+  save_top_k: 1
+  monitor: bleu/val
+  mode: max