EleutherAI · haileyschoelkopf · Mar 3, 2024 · Feb 24, 2024 · Feb 27, 2024 · Feb 27, 2024
@@ -193,6 +193,12 @@ def parse_eval_args() -> argparse.Namespace:
             "E.g, `--seed 42` sets all three seeds to 42."
         ),
     )
+    parser.add_argument(
+        "--trust_remote_code",
+        default=True,
+        help="Sets trust_remote_code to True to execute code to create HF Datasets from the Hub",
+    )
+
     return parser.parse_args()
 
 
@@ -282,6 +288,16 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
             path.mkdir(parents=True, exist_ok=True)
             output_path_file = path.joinpath("results.json")
 
+    # Respect user's value passed in via CLI, otherwise default to True and add to comma-separated model args
+    if args.trust_remote_code:
+        os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = (
+            args.trust_remote_code if args.trust_remote_code else True
+        )
+        args.model_args = (
+            args.model_args
+            + f",trust_remote_code={os.environ['HF_DATASETS_TRUST_REMOTE_CODE']}"
+        )
+
     eval_logger.info(f"Selected Tasks: {task_names}")
     eval_logger.info("Loading selected tasks...")
 

@@ -14,3 +14,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 1.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_2da
 dataset_name: arithmetic_2da
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_2dm
 dataset_name: arithmetic_2dm
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_2ds
 dataset_name: arithmetic_2ds
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_3da
 dataset_name: arithmetic_3da
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_3ds
 dataset_name: arithmetic_3ds
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_4da
 dataset_name: arithmetic_4da
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_4ds
 dataset_name: arithmetic_4ds
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_5da
 dataset_name: arithmetic_5da
+dataset_kwargs:
+  trust_remote_code: true
@@ -1,3 +1,5 @@
 include: arithmetic_1dc.yaml
 task: arithmetic_5ds
 dataset_name: arithmetic_5ds
+dataset_kwargs:
+  trust_remote_code: true
@@ -12,3 +12,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 1.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -20,3 +20,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 3.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -22,3 +22,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 3.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -21,3 +21,5 @@ metric_list:
     higher_is_better: True
 metadata:
   version: 1.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -21,3 +21,5 @@ metric_list:
     higher_is_better: True
 metadata:
   version: 1.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -18,3 +18,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 1.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -19,3 +19,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 1.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -25,3 +25,5 @@ filter_list:
       - function: "take_first"
 metadata:
   version: 0.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -23,3 +23,5 @@ num_fewshot: 0
 metadata:
   version: 1.0
   num_fewshot: 4
+dataset_kwargs:
+  trust_remote_code: true
@@ -12,3 +12,5 @@ metric_list:
   - metric: acc
 metadata:
   version: 0.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -12,3 +12,5 @@ metric_list:
   - metric: acc
 metadata:
   version: 0.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -12,3 +12,5 @@ metric_list:
   - metric: acc
 metadata:
   version: 0.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -23,3 +23,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -12,3 +12,5 @@ metric_list:
     higher_is_better: true
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -18,3 +18,5 @@ metric_list:
     ignore_punctuation: false
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -18,3 +18,5 @@ metric_list:
     ignore_punctuation: false
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -18,3 +18,5 @@ metric_list:
     ignore_punctuation: false
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -18,3 +18,5 @@ metric_list:
     ignore_punctuation: false
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -16,3 +16,5 @@ metric_list:
   - metric: bits_per_byte
 metadata:
   version: 2.0
+dataset_kwargs:
+  trust_remote_code: true
@@ -21,7 +21,7 @@ license = { "text" = "MIT" }
 dependencies = [
     "accelerate>=0.21.0",
     "evaluate",
-    "datasets>=2.14.0",
+    "datasets>=2.16.0",
     "evaluate>=0.4.0",
     "jsonlines",
     "numexpr",