Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setting trust_remote_code to True for HuggingFace datasets compatibility #1487

Merged
merged 9 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions lm_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ def parse_eval_args() -> argparse.Namespace:
"E.g, `--seed 42` sets all three seeds to 42."
),
)
parser.add_argument(
"--trust_remote_code",
default=True,
help="Sets trust_remote_code to True to execute code to create HF Datasets from the Hub",
)

return parser.parse_args()


Expand Down Expand Up @@ -282,6 +288,16 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
path.mkdir(parents=True, exist_ok=True)
output_path_file = path.joinpath("results.json")

# Respect user's value passed in via CLI, otherwise default to True and add to comma-separated model args
if args.trust_remote_code:
os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = (
args.trust_remote_code if args.trust_remote_code else True
)
args.model_args = (
args.model_args
+ f",trust_remote_code={os.environ['HF_DATASETS_TRUST_REMOTE_CODE']}"
)

eval_logger.info(f"Selected Tasks: {task_names}")
eval_logger.info("Loading selected tasks...")

Expand Down
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ metric_list:
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_2da.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_2da
dataset_name: arithmetic_2da
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_2dm
dataset_name: arithmetic_2dm
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_2ds
dataset_name: arithmetic_2ds
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_3da.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_3da
dataset_name: arithmetic_3da
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_3ds
dataset_name: arithmetic_3ds
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_4da.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_4da
dataset_name: arithmetic_4da
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_4ds
dataset_name: arithmetic_4ds
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_5da.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_5da
dataset_name: arithmetic_5da
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include: arithmetic_1dc.yaml
task: arithmetic_5ds
dataset_name: arithmetic_5ds
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/asdiv/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ metric_list:
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/coqa/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ metric_list:
higher_is_better: true
metadata:
version: 3.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/drop/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@ metric_list:
higher_is_better: true
metadata:
version: 3.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/kobest/kobest_sentineg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ metric_list:
higher_is_better: True
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/kobest/kobest_wic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ metric_list:
higher_is_better: True
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/lambada/lambada_openai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ metric_list:
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/logiqa/logiqa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@ metric_list:
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/logiqa2/logieval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ filter_list:
- function: "take_first"
metadata:
version: 0.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ num_fewshot: 0
metadata:
version: 1.0
num_fewshot: 4
dataset_kwargs:
trust_remote_code: true
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ metric_list:
- metric: acc
metadata:
version: 0.0
dataset_kwargs:
trust_remote_code: true
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ metric_list:
- metric: acc
metadata:
version: 0.0
dataset_kwargs:
trust_remote_code: true
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ metric_list:
- metric: acc
metadata:
version: 0.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/mutual/mutual.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ metric_list:
higher_is_better: true
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/race/race.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ metric_list:
higher_is_better: true
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/unscramble/anagrams1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ metric_list:
ignore_punctuation: false
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/unscramble/anagrams2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ metric_list:
ignore_punctuation: false
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/unscramble/cycle_letters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ metric_list:
ignore_punctuation: false
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/unscramble/random_insertion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ metric_list:
ignore_punctuation: false
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 2 additions & 0 deletions lm_eval/tasks/wikitext/wikitext.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ metric_list:
- metric: bits_per_byte
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ license = { "text" = "MIT" }
dependencies = [
"accelerate>=0.21.0",
"evaluate",
"datasets>=2.14.0",
"datasets>=2.16.0",
"evaluate>=0.4.0",
"jsonlines",
"numexpr",
Expand Down
Loading