Skip to content

Commit

Permalink
Allow eval without dropout
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasGeiping committed Apr 5, 2024
1 parent 7bcd670 commit d1b87f4
Show file tree
Hide file tree
Showing 10 changed files with 15 additions and 9 deletions.
2 changes: 1 addition & 1 deletion cramming/config/data/sources/ag_news.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# For sanity testing
ag_news:
provider: huggingface
name: default
partition: default
split: train

streaming: False
Expand Down
4 changes: 2 additions & 2 deletions cramming/config/data/the-pile-stream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ seq_length: 128
include_cls_token_in_corpus: False
include_sep_token_in_corpus: True
use_type_ids: False
max_entries_in_raw_dataset: 8e6 # Select only this many examples from the dataset # 20e6 are ok if all are chosen. Oversample if filtering
max_seq_in_tokenized_dataset: 85e6 # Select only this many tokenized sequences.
max_entries_in_raw_dataset: 48e6 # Select only this many examples from the dataset # 20e6 are ok if all are chosen. Oversample if filtering
max_seq_in_tokenized_dataset: 1e14 # Select only this many tokenized sequence (or less)
# max_seq_in_tokenized_dataset should be just slightly more than budget * 60 * 60 * expected tokens/sec for the single epoch of training

# Data Cleaning:
Expand Down
1 change: 1 addition & 0 deletions cramming/config/eval/GLUE.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ limited_decay_keys: [bias, LayerNorm.bias, LayerNorm.weight, norm]
scheduler: linear
optim_mod:
name: none
eval_in_train_mode: True # Turn on dropout (if existing in the model) during finetuning

epochs: 10

Expand Down
1 change: 1 addition & 0 deletions cramming/config/eval/GLUE_sane.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ limited_decay_keys: [bias, LayerNorm.bias, LayerNorm.weight, norm]
scheduler: cosine-decay
optim_mod:
name: none
eval_in_train_mode: True # Turn on dropout (if existing in the model) during finetuning

epochs: 5

Expand Down
1 change: 1 addition & 0 deletions cramming/config/eval/RACE.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ limited_decay_keys: [bias, LayerNorm.bias, LayerNorm.weight, norm]
scheduler: cosine-decay
optim_mod:
name: none
eval_in_train_mode: True # Turn on dropout (if existing in the model) during finetuning

epochs: 5

Expand Down
1 change: 1 addition & 0 deletions cramming/config/eval/SWAG.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ limited_decay_keys: [bias, LayerNorm.bias, LayerNorm.weight, norm]
scheduler: cosine-decay
optim_mod:
name: none
eval_in_train_mode: True # Turn on dropout (if existing in the model) during finetuning

epochs: 5

Expand Down
1 change: 1 addition & 0 deletions cramming/config/eval/mnli.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ limited_decay_keys: [bias, LayerNorm.bias, LayerNorm.weight, norm]
scheduler: linear
optim_mod:
name: none
eval_in_train_mode: True # Turn on dropout (if existing in the model) during finetuning

epochs: 10

Expand Down
1 change: 1 addition & 0 deletions cramming/config/eval/superGLUE.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ limited_decay_keys: [bias, LayerNorm.bias, LayerNorm.weight, norm]
scheduler: cosine-decay
optim_mod:
name: none
eval_in_train_mode: True # Turn on dropout (if existing in the model) during finetuning

epochs: 10 # all superGLUE tasks are short

Expand Down
8 changes: 4 additions & 4 deletions cramming/config/impl/_default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@ path: data

# data implementation:
local_staging_dir: # Optionally copy a preprocessed dataset into this folder before loading it for training
forbid_dataset_preprocessing: False
forbid_dataset_preprocessing: True
temporary_corpus: False # Save data directly into local staging dir, forget after use
max_raw_chunk_size: 8e6

# checkpointing and logging:
print_loss_every_nth_step: 1000
save_intermediate_checkpoints: False
save_every_nth_step: 5000
resume_run_after_preempt: True
save_every_nth_step: 10000
resume_run_after_preempt: False

# checkpoint troubleshooting (should only be relevant for >100bil token runs)
troubleshoot_strategy: recover_checkpoint # can include "recovery_checkpoint" and "dump_nan_grads"
troubleshoot_strategy: "none" # recover_checkpoint # can include "recovery_checkpoint" and "dump_nan_grads"

# early termination, cancel runs that do not meet this loss threshold early.
early_termination:
Expand Down
4 changes: 2 additions & 2 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def main_downstream_process(cfg, setup):
targets = [evaluate.load(metric_name, cache_dir=cfg.impl.path) for metric_name in task["details"]["target_metrics"]]
metric = evaluate.CombinedEvaluations(targets)
# Launch training
model_engine.train()
model_engine.train(cfg.eval.eval_in_train_mode)
loss_vals = []
for epoch in range(cfg.eval.epochs):
train_time = time.time()
Expand Down Expand Up @@ -136,7 +136,7 @@ def validate(model_engine, validloader, metric, setup, cfg):
except ValueError: # pearson corr computation will raise errors if metric values are NaN
log.info("Value Error in metrics computation, maybe non-finite values in prediction. Returning backup score.")
eval_metric = metric.compute(predictions=[0, 1], references=[1, 0]) # spoof terrible result if metric computation fails
model_engine.train()
model_engine.train(cfg.eval.eval_in_train_mode)
return {k: float(v) for k, v in eval_metric.items()} # force float returns


Expand Down

0 comments on commit d1b87f4

Please sign in to comment.