From 4b7feca4a6a1f06ceb2c9d7131bf840e9aede90d Mon Sep 17 00:00:00 2001 From: Jonathan Burdge Date: Tue, 23 Apr 2024 18:06:55 +0300 Subject: [PATCH 1/2] add mmlu arc style evaluation --- lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml | 11 +++++++++++ lm_eval/tasks/mmlu/arc_style/_mmlu.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_business_ethics.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_college_biology.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_college_chemistry.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_college_computer_science.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_college_mathematics.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_college_medicine.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_college_physics.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_computer_security.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_electrical_engineering.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_elementary_mathematics.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_biology.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_chemistry.yaml | 6 ++++++ .../arc_style/mmlu_high_school_computer_science.yaml | 6 ++++++ .../arc_style/mmlu_high_school_european_history.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_geography.yaml | 6 ++++++ .../mmlu_high_school_government_and_politics.yaml | 6 ++++++ .../arc_style/mmlu_high_school_macroeconomics.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_mathematics.yaml | 6 ++++++ .../arc_style/mmlu_high_school_microeconomics.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_physics.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_psychology.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_statistics.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_high_school_us_history.yaml | 6 ++++++ .../arc_style/mmlu_high_school_world_history.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_human_sexuality.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_international_law.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_machine_learning.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_medical_genetics.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_professional_accounting.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_professional_law.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_professional_medicine.yaml | 6 ++++++ .../mmlu/arc_style/mmlu_professional_psychology.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_public_relations.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_security_studies.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml | 6 ++++++ lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml | 6 ++++++ .../tasks/mmlu/arc_style/mmlu_world_religions.yaml | 6 ++++++ 59 files changed, 359 insertions(+) create mode 100644 lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/_mmlu.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml create mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml diff --git a/lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml b/lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml new file mode 100644 index 0000000000..b7d75e9869 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml @@ -0,0 +1,11 @@ +dataset_path: hails/mmlu_no_train # a copy of `cais/mmlu` with no auxiliary_train split +output_type: multiple_choice +test_split: test +fewshot_split: dev +fewshot_config: + sampler: first_n +doc_to_text: "Question: {{question.strip()}}\nAnswer:" +doc_to_choice: "{{choices}}" +doc_to_target: "{{answer}}" +metadata: + version: 0.0 diff --git a/lm_eval/tasks/mmlu/arc_style/_mmlu.yaml b/lm_eval/tasks/mmlu/arc_style/_mmlu.yaml new file mode 100644 index 0000000000..28e44ae329 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_arc_style +task: + - mmlu_arc_style_stem + - mmlu_arc_style_other + - mmlu_arc_style_social_sciences + - mmlu_arc_style_humanities diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml new file mode 100644 index 0000000000..a847cc7858 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml @@ -0,0 +1,6 @@ +"dataset_name": "abstract_algebra" +"description": "The following are questions (with answers) about abstract\ + \ algebra.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml new file mode 100644 index 0000000000..7aebdb2cdd --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_anatomy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml new file mode 100644 index 0000000000..1a76409ec0 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "astronomy" +"description": "The following are questions (with answers) about astronomy.\n\ + \n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_astronomy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml new file mode 100644 index 0000000000..b732b37abd --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "business_ethics" +"description": "The following are questions (with answers) about business\ + \ ethics.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_business_ethics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml new file mode 100644 index 0000000000..6788dd05a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml @@ -0,0 +1,6 @@ +"dataset_name": "clinical_knowledge" +"description": "The following are questions (with answers) about clinical\ + \ knowledge.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml new file mode 100644 index 0000000000..4c156ed146 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_biology" +"description": "The following are questions (with answers) about college\ + \ biology.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_college_biology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml new file mode 100644 index 0000000000..e7800f384c --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_chemistry" +"description": "The following are questions (with answers) about college\ + \ chemistry.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_college_chemistry" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml new file mode 100644 index 0000000000..80ea6a8dd0 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_computer_science" +"description": "The following are questions (with answers) about college\ + \ computer science.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_college_computer_science" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml new file mode 100644 index 0000000000..fee25308a5 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_mathematics" +"description": "The following are questions (with answers) about college\ + \ mathematics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_college_mathematics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml new file mode 100644 index 0000000000..ac3248ec8e --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_medicine" +"description": "The following are questions (with answers) about college\ + \ medicine.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_college_medicine" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml new file mode 100644 index 0000000000..e5344189cb --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "college_physics" +"description": "The following are questions (with answers) about college\ + \ physics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_college_physics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml new file mode 100644 index 0000000000..ded6da67f5 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml @@ -0,0 +1,6 @@ +"dataset_name": "computer_security" +"description": "The following are questions (with answers) about computer\ + \ security.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_computer_security" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml new file mode 100644 index 0000000000..1e8db17a21 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "conceptual_physics" +"description": "The following are questions (with answers) about conceptual\ + \ physics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml new file mode 100644 index 0000000000..3d895addd3 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_econometrics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml new file mode 100644 index 0000000000..438d66cedd --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml @@ -0,0 +1,6 @@ +"dataset_name": "electrical_engineering" +"description": "The following are questions (with answers) about electrical\ + \ engineering.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml new file mode 100644 index 0000000000..7ab6a90e37 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "elementary_mathematics" +"description": "The following are questions (with answers) about elementary\ + \ mathematics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml new file mode 100644 index 0000000000..bdf75ab9fe --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_formal_logic" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml new file mode 100644 index 0000000000..ee61929276 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml @@ -0,0 +1,6 @@ +"dataset_name": "global_facts" +"description": "The following are questions (with answers) about global\ + \ facts.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_global_facts" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml new file mode 100644 index 0000000000..0e25488833 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_biology" +"description": "The following are questions (with answers) about high\ + \ school biology.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_biology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml new file mode 100644 index 0000000000..1f20fa7159 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_chemistry" +"description": "The following are questions (with answers) about high\ + \ school chemistry.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml new file mode 100644 index 0000000000..0a643f568c --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_computer_science" +"description": "The following are questions (with answers) about high\ + \ school computer science.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml new file mode 100644 index 0000000000..0e5c657994 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_european_history" +"description": "The following are questions (with answers) about high\ + \ school european history.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml new file mode 100644 index 0000000000..ddc90bcbf6 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_geography" +"description": "The following are questions (with answers) about high\ + \ school geography.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_geography" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml new file mode 100644 index 0000000000..b60336f946 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_government_and_politics" +"description": "The following are questions (with answers) about high\ + \ school government and politics.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml new file mode 100644 index 0000000000..5c356cf8c1 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_macroeconomics" +"description": "The following are questions (with answers) about high\ + \ school macroeconomics.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml new file mode 100644 index 0000000000..07139b883e --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_mathematics" +"description": "The following are questions (with answers) about high\ + \ school mathematics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml new file mode 100644 index 0000000000..bec0e39fd0 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_microeconomics" +"description": "The following are questions (with answers) about high\ + \ school microeconomics.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml new file mode 100644 index 0000000000..c7a8906cc5 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_physics" +"description": "The following are questions (with answers) about high\ + \ school physics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_physics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml new file mode 100644 index 0000000000..25c30ba7a3 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_psychology" +"description": "The following are questions (with answers) about high\ + \ school psychology.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml new file mode 100644 index 0000000000..32119ae8cd --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_statistics" +"description": "The following are questions (with answers) about high\ + \ school statistics.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml new file mode 100644 index 0000000000..6e682d6071 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_us_history" +"description": "The following are questions (with answers) about high\ + \ school us history.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml new file mode 100644 index 0000000000..61220d298d --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml @@ -0,0 +1,6 @@ +"dataset_name": "high_school_world_history" +"description": "The following are questions (with answers) about high\ + \ school world history.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml new file mode 100644 index 0000000000..62c0d859e5 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_aging" +"description": "The following are questions (with answers) about human\ + \ aging.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_human_aging" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000..603ecd9529 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_human_sexuality" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml new file mode 100644 index 0000000000..ff0c8038b2 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "international_law" +"description": "The following are questions (with answers) about international\ + \ law.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_international_law" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000..6c1a0e3de0 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_jurisprudence" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml new file mode 100644 index 0000000000..1e4a9c2fef --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "logical_fallacies" +"description": "The following are questions (with answers) about logical\ + \ fallacies.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml new file mode 100644 index 0000000000..477deda6aa --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml @@ -0,0 +1,6 @@ +"dataset_name": "machine_learning" +"description": "The following are questions (with answers) about machine\ + \ learning.\n\n" +"group": "mmlu_arc_style_stem" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_machine_learning" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml new file mode 100644 index 0000000000..7315e9c5ed --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml @@ -0,0 +1,6 @@ +"dataset_name": "management" +"description": "The following are questions (with answers) about management.\n\ + \n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_management" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml new file mode 100644 index 0000000000..d224c0e8fb --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_marketing" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml new file mode 100644 index 0000000000..5d09745e30 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "medical_genetics" +"description": "The following are questions (with answers) about medical\ + \ genetics.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_medical_genetics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml new file mode 100644 index 0000000000..de8fa75236 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml @@ -0,0 +1,6 @@ +"dataset_name": "miscellaneous" +"description": "The following are questions (with answers) about miscellaneous.\n\ + \n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_miscellaneous" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml new file mode 100644 index 0000000000..2e3e60900c --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_disputes" +"description": "The following are questions (with answers) about moral\ + \ disputes.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_moral_disputes" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml new file mode 100644 index 0000000000..5fb50c9ef2 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml @@ -0,0 +1,6 @@ +"dataset_name": "moral_scenarios" +"description": "The following are questions (with answers) about moral\ + \ scenarios.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml new file mode 100644 index 0000000000..365d4f7ac7 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_nutrition" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml new file mode 100644 index 0000000000..530ecf3079 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_philosophy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml new file mode 100644 index 0000000000..791c4a3a77 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_prehistory" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml new file mode 100644 index 0000000000..9e52f864a8 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_accounting" +"description": "The following are questions (with answers) about professional\ + \ accounting.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_professional_accounting" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml new file mode 100644 index 0000000000..e8421ec0b9 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_law" +"description": "The following are questions (with answers) about professional\ + \ law.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_professional_law" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml new file mode 100644 index 0000000000..e33ca2dbd1 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_medicine" +"description": "The following are questions (with answers) about professional\ + \ medicine.\n\n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_professional_medicine" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml new file mode 100644 index 0000000000..a489057894 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "professional_psychology" +"description": "The following are questions (with answers) about professional\ + \ psychology.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_professional_psychology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml new file mode 100644 index 0000000000..c4a6de77ca --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_public_relations" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml new file mode 100644 index 0000000000..ef67bbe6eb --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_security_studies" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml new file mode 100644 index 0000000000..fd0431895a --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_sociology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000..2bdb27ec3a --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_arc_style_social_sciences" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml new file mode 100644 index 0000000000..3ebf9f1306 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are questions (with answers) about virology.\n\ + \n" +"group": "mmlu_arc_style_other" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_virology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml new file mode 100644 index 0000000000..02bfe08dc9 --- /dev/null +++ b/lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml @@ -0,0 +1,6 @@ +"dataset_name": "world_religions" +"description": "The following are questions (with answers) about world\ + \ religions.\n\n" +"group": "mmlu_arc_style_humanities" +"include": "_arc_style_template_yaml" +"task": "mmlu_arc_style_world_religions" From 9b727735142aaf9342342e94e33329749048f76e Mon Sep 17 00:00:00 2001 From: Jonathan Burdge Date: Wed, 24 Apr 2024 21:53:16 +0300 Subject: [PATCH 2/2] rename arc_style to continuation --- lm_eval/tasks/mmlu/arc_style/_mmlu.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml | 6 ------ lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml | 6 ------ .../_continuation_template_yaml} | 0 lm_eval/tasks/mmlu/continuation/_mmlu.yaml | 6 ++++++ .../{arc_style => continuation}/mmlu_abstract_algebra.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml | 6 ++++++ .../mmlu/{arc_style => continuation}/mmlu_astronomy.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_business_ethics.yaml | 6 +++--- .../mmlu_clinical_knowledge.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_college_biology.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_college_chemistry.yaml | 6 +++--- .../mmlu_college_computer_science.yaml | 6 +++--- .../mmlu_college_mathematics.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_college_medicine.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_college_physics.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_computer_security.yaml | 6 +++--- .../mmlu_conceptual_physics.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml | 6 ++++++ .../mmlu_electrical_engineering.yaml | 6 +++--- .../mmlu_elementary_mathematics.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml | 6 ++++++ .../mmlu/{arc_style => continuation}/mmlu_global_facts.yaml | 6 +++--- .../mmlu_high_school_biology.yaml | 6 +++--- .../mmlu_high_school_chemistry.yaml | 6 +++--- .../mmlu_high_school_computer_science.yaml | 6 +++--- .../mmlu_high_school_european_history.yaml | 6 +++--- .../mmlu_high_school_geography.yaml | 6 +++--- .../mmlu_high_school_government_and_politics.yaml | 6 +++--- .../mmlu_high_school_macroeconomics.yaml | 6 +++--- .../mmlu_high_school_mathematics.yaml | 6 +++--- .../mmlu_high_school_microeconomics.yaml | 6 +++--- .../mmlu_high_school_physics.yaml | 6 +++--- .../mmlu_high_school_psychology.yaml | 6 +++--- .../mmlu_high_school_statistics.yaml | 6 +++--- .../mmlu_high_school_us_history.yaml | 6 +++--- .../mmlu_high_school_world_history.yaml | 6 +++--- .../mmlu/{arc_style => continuation}/mmlu_human_aging.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml | 6 ++++++ .../{arc_style => continuation}/mmlu_international_law.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml | 6 ++++++ .../{arc_style => continuation}/mmlu_logical_fallacies.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_machine_learning.yaml | 6 +++--- .../mmlu/{arc_style => continuation}/mmlu_management.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml | 6 ++++++ .../{arc_style => continuation}/mmlu_medical_genetics.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_miscellaneous.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_moral_disputes.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_moral_scenarios.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml | 6 ++++++ lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml | 6 ++++++ lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml | 6 ++++++ .../mmlu_professional_accounting.yaml | 6 +++--- .../{arc_style => continuation}/mmlu_professional_law.yaml | 6 +++--- .../mmlu_professional_medicine.yaml | 6 +++--- .../mmlu_professional_psychology.yaml | 6 +++--- lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml | 6 ++++++ lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml | 6 ++++++ lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml | 6 ++++++ lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml | 6 ++++++ lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml | 6 ++++++ .../{arc_style => continuation}/mmlu_world_religions.yaml | 6 +++--- 74 files changed, 219 insertions(+), 219 deletions(-) delete mode 100644 lm_eval/tasks/mmlu/arc_style/_mmlu.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml delete mode 100644 lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml rename lm_eval/tasks/mmlu/{arc_style/_arc_style_template_yaml => continuation/_continuation_template_yaml} (100%) create mode 100644 lm_eval/tasks/mmlu/continuation/_mmlu.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_abstract_algebra.yaml (51%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_astronomy.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_business_ethics.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_clinical_knowledge.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_college_biology.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_college_chemistry.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_college_computer_science.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_college_mathematics.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_college_medicine.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_college_physics.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_computer_security.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_conceptual_physics.yaml (51%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_electrical_engineering.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_elementary_mathematics.yaml (52%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_global_facts.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_biology.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_chemistry.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_computer_science.yaml (53%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_european_history.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_geography.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_government_and_politics.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_macroeconomics.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_mathematics.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_microeconomics.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_physics.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_psychology.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_statistics.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_us_history.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_high_school_world_history.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_human_aging.yaml (50%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_international_law.yaml (50%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_logical_fallacies.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_machine_learning.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_management.yaml (50%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_medical_genetics.yaml (51%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_miscellaneous.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_moral_disputes.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_moral_scenarios.yaml (50%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_professional_accounting.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_professional_law.yaml (50%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_professional_medicine.yaml (52%) rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_professional_psychology.yaml (50%) create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml create mode 100644 lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml rename lm_eval/tasks/mmlu/{arc_style => continuation}/mmlu_world_religions.yaml (50%) diff --git a/lm_eval/tasks/mmlu/arc_style/_mmlu.yaml b/lm_eval/tasks/mmlu/arc_style/_mmlu.yaml deleted file mode 100644 index 28e44ae329..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/_mmlu.yaml +++ /dev/null @@ -1,6 +0,0 @@ -group: mmlu_arc_style -task: - - mmlu_arc_style_stem - - mmlu_arc_style_other - - mmlu_arc_style_social_sciences - - mmlu_arc_style_humanities diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml deleted file mode 100644 index 7aebdb2cdd..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_anatomy.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "anatomy" -"description": "The following are questions (with answers) about anatomy.\n\ - \n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_anatomy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml deleted file mode 100644 index 3d895addd3..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_econometrics.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "econometrics" -"description": "The following are questions (with answers) about econometrics.\n\ - \n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_econometrics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml deleted file mode 100644 index bdf75ab9fe..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_formal_logic.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "formal_logic" -"description": "The following are questions (with answers) about formal\ - \ logic.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_formal_logic" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml deleted file mode 100644 index 603ecd9529..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_human_sexuality.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "human_sexuality" -"description": "The following are questions (with answers) about human\ - \ sexuality.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_human_sexuality" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml deleted file mode 100644 index 6c1a0e3de0..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_jurisprudence.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "jurisprudence" -"description": "The following are questions (with answers) about jurisprudence.\n\ - \n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_jurisprudence" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml deleted file mode 100644 index d224c0e8fb..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_marketing.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "marketing" -"description": "The following are questions (with answers) about marketing.\n\ - \n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_marketing" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml deleted file mode 100644 index 365d4f7ac7..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_nutrition.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "nutrition" -"description": "The following are questions (with answers) about nutrition.\n\ - \n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_nutrition" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml deleted file mode 100644 index 530ecf3079..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_philosophy.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "philosophy" -"description": "The following are questions (with answers) about philosophy.\n\ - \n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_philosophy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml deleted file mode 100644 index 791c4a3a77..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_prehistory.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "prehistory" -"description": "The following are questions (with answers) about prehistory.\n\ - \n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_prehistory" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml deleted file mode 100644 index c4a6de77ca..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_public_relations.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "public_relations" -"description": "The following are questions (with answers) about public\ - \ relations.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_public_relations" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml deleted file mode 100644 index ef67bbe6eb..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_security_studies.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "security_studies" -"description": "The following are questions (with answers) about security\ - \ studies.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_security_studies" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml deleted file mode 100644 index fd0431895a..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_sociology.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "sociology" -"description": "The following are questions (with answers) about sociology.\n\ - \n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_sociology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml deleted file mode 100644 index 2bdb27ec3a..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_us_foreign_policy.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "us_foreign_policy" -"description": "The following are questions (with answers) about us\ - \ foreign policy.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml b/lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml deleted file mode 100644 index 3ebf9f1306..0000000000 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_virology.yaml +++ /dev/null @@ -1,6 +0,0 @@ -"dataset_name": "virology" -"description": "The following are questions (with answers) about virology.\n\ - \n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_virology" diff --git a/lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml b/lm_eval/tasks/mmlu/continuation/_continuation_template_yaml similarity index 100% rename from lm_eval/tasks/mmlu/arc_style/_arc_style_template_yaml rename to lm_eval/tasks/mmlu/continuation/_continuation_template_yaml diff --git a/lm_eval/tasks/mmlu/continuation/_mmlu.yaml b/lm_eval/tasks/mmlu/continuation/_mmlu.yaml new file mode 100644 index 0000000000..77ef1b1d41 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/_mmlu.yaml @@ -0,0 +1,6 @@ +group: mmlu_continuation +task: + - mmlu_continuation_stem + - mmlu_continuation_other + - mmlu_continuation_social_sciences + - mmlu_continuation_humanities diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml index a847cc7858..962ee9c81f 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_abstract_algebra.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_abstract_algebra.yaml @@ -1,6 +1,6 @@ "dataset_name": "abstract_algebra" "description": "The following are questions (with answers) about abstract\ \ algebra.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_abstract_algebra" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_abstract_algebra" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml new file mode 100644 index 0000000000..efb8ab56ad --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_anatomy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "anatomy" +"description": "The following are questions (with answers) about anatomy.\n\ + \n" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_anatomy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml index 1a76409ec0..7cebe62c87 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_astronomy.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_astronomy.yaml @@ -1,6 +1,6 @@ "dataset_name": "astronomy" "description": "The following are questions (with answers) about astronomy.\n\ \n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_astronomy" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_astronomy" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml index b732b37abd..52e47605bd 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_business_ethics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_business_ethics.yaml @@ -1,6 +1,6 @@ "dataset_name": "business_ethics" "description": "The following are questions (with answers) about business\ \ ethics.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_business_ethics" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_business_ethics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml index 6788dd05a8..a0abd04905 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_clinical_knowledge.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_clinical_knowledge.yaml @@ -1,6 +1,6 @@ "dataset_name": "clinical_knowledge" "description": "The following are questions (with answers) about clinical\ \ knowledge.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_clinical_knowledge" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_clinical_knowledge" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml index 4c156ed146..a2c8570c4b 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_college_biology.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_biology.yaml @@ -1,6 +1,6 @@ "dataset_name": "college_biology" "description": "The following are questions (with answers) about college\ \ biology.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_college_biology" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_college_biology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml index e7800f384c..cfb11c87b1 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_college_chemistry.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_chemistry.yaml @@ -1,6 +1,6 @@ "dataset_name": "college_chemistry" "description": "The following are questions (with answers) about college\ \ chemistry.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_college_chemistry" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_college_chemistry" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml index 80ea6a8dd0..67c6548e53 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_college_computer_science.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_computer_science.yaml @@ -1,6 +1,6 @@ "dataset_name": "college_computer_science" "description": "The following are questions (with answers) about college\ \ computer science.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_college_computer_science" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_college_computer_science" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml index fee25308a5..f694391b2c 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_college_mathematics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_mathematics.yaml @@ -1,6 +1,6 @@ "dataset_name": "college_mathematics" "description": "The following are questions (with answers) about college\ \ mathematics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_college_mathematics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_college_mathematics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml index ac3248ec8e..97b83a3feb 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_college_medicine.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_medicine.yaml @@ -1,6 +1,6 @@ "dataset_name": "college_medicine" "description": "The following are questions (with answers) about college\ \ medicine.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_college_medicine" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_college_medicine" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml index e5344189cb..b01e3ff191 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_college_physics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_college_physics.yaml @@ -1,6 +1,6 @@ "dataset_name": "college_physics" "description": "The following are questions (with answers) about college\ \ physics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_college_physics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_college_physics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml index ded6da67f5..8ba7f5b0ab 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_computer_security.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_computer_security.yaml @@ -1,6 +1,6 @@ "dataset_name": "computer_security" "description": "The following are questions (with answers) about computer\ \ security.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_computer_security" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_computer_security" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml index 1e8db17a21..8a4fd2968d 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_conceptual_physics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_conceptual_physics.yaml @@ -1,6 +1,6 @@ "dataset_name": "conceptual_physics" "description": "The following are questions (with answers) about conceptual\ \ physics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_conceptual_physics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_conceptual_physics" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml new file mode 100644 index 0000000000..2bd6526549 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_econometrics.yaml @@ -0,0 +1,6 @@ +"dataset_name": "econometrics" +"description": "The following are questions (with answers) about econometrics.\n\ + \n" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_econometrics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml index 438d66cedd..b7e9cbdb47 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_electrical_engineering.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_electrical_engineering.yaml @@ -1,6 +1,6 @@ "dataset_name": "electrical_engineering" "description": "The following are questions (with answers) about electrical\ \ engineering.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_electrical_engineering" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_electrical_engineering" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml index 7ab6a90e37..689dc17c9c 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_elementary_mathematics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_elementary_mathematics.yaml @@ -1,6 +1,6 @@ "dataset_name": "elementary_mathematics" "description": "The following are questions (with answers) about elementary\ \ mathematics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_elementary_mathematics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_elementary_mathematics" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml new file mode 100644 index 0000000000..149b6c8449 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_formal_logic.yaml @@ -0,0 +1,6 @@ +"dataset_name": "formal_logic" +"description": "The following are questions (with answers) about formal\ + \ logic.\n\n" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_formal_logic" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml index ee61929276..e02fba5f68 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_global_facts.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_global_facts.yaml @@ -1,6 +1,6 @@ "dataset_name": "global_facts" "description": "The following are questions (with answers) about global\ \ facts.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_global_facts" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_global_facts" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml index 0e25488833..f774f95868 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_biology.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_biology.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_biology" "description": "The following are questions (with answers) about high\ \ school biology.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_biology" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_biology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml index 1f20fa7159..c6a338a72c 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_chemistry.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_chemistry.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_chemistry" "description": "The following are questions (with answers) about high\ \ school chemistry.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_chemistry" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_chemistry" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml similarity index 53% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml index 0a643f568c..4038906350 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_computer_science.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_computer_science.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_computer_science" "description": "The following are questions (with answers) about high\ \ school computer science.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_computer_science" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_computer_science" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml index 0e5c657994..0ef0cbd2b9 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_european_history.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_european_history.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_european_history" "description": "The following are questions (with answers) about high\ \ school european history.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_european_history" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_european_history" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml index ddc90bcbf6..301280b89b 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_geography.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_geography.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_geography" "description": "The following are questions (with answers) about high\ \ school geography.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_geography" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_geography" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml index b60336f946..792ed584c7 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_government_and_politics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_government_and_politics.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_government_and_politics" "description": "The following are questions (with answers) about high\ \ school government and politics.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_government_and_politics" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_government_and_politics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml index 5c356cf8c1..858d91f676 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_macroeconomics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_macroeconomics.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_macroeconomics" "description": "The following are questions (with answers) about high\ \ school macroeconomics.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_macroeconomics" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_macroeconomics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml index 07139b883e..9c64bb05b1 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_mathematics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_mathematics.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_mathematics" "description": "The following are questions (with answers) about high\ \ school mathematics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_mathematics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_mathematics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml index bec0e39fd0..df33f528de 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_microeconomics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_microeconomics.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_microeconomics" "description": "The following are questions (with answers) about high\ \ school microeconomics.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_microeconomics" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_microeconomics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml index c7a8906cc5..562993b6bd 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_physics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_physics.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_physics" "description": "The following are questions (with answers) about high\ \ school physics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_physics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_physics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml index 25c30ba7a3..b3b4a93495 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_psychology.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_psychology.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_psychology" "description": "The following are questions (with answers) about high\ \ school psychology.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_psychology" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_psychology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml index 32119ae8cd..be899e1d4e 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_statistics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_statistics.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_statistics" "description": "The following are questions (with answers) about high\ \ school statistics.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_statistics" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_statistics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml index 6e682d6071..ad16ce7db1 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_us_history.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_us_history.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_us_history" "description": "The following are questions (with answers) about high\ \ school us history.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_us_history" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_us_history" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml index 61220d298d..dc2bcbfbbd 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_high_school_world_history.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_high_school_world_history.yaml @@ -1,6 +1,6 @@ "dataset_name": "high_school_world_history" "description": "The following are questions (with answers) about high\ \ school world history.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_high_school_world_history" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_high_school_world_history" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml index 62c0d859e5..1ad19e9aae 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_human_aging.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_human_aging.yaml @@ -1,6 +1,6 @@ "dataset_name": "human_aging" "description": "The following are questions (with answers) about human\ \ aging.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_human_aging" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_human_aging" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml new file mode 100644 index 0000000000..607a951d35 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_human_sexuality.yaml @@ -0,0 +1,6 @@ +"dataset_name": "human_sexuality" +"description": "The following are questions (with answers) about human\ + \ sexuality.\n\n" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_human_sexuality" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml index ff0c8038b2..0783ce18b5 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_international_law.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_international_law.yaml @@ -1,6 +1,6 @@ "dataset_name": "international_law" "description": "The following are questions (with answers) about international\ \ law.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_international_law" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_international_law" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml new file mode 100644 index 0000000000..37de68e1d2 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_jurisprudence.yaml @@ -0,0 +1,6 @@ +"dataset_name": "jurisprudence" +"description": "The following are questions (with answers) about jurisprudence.\n\ + \n" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_jurisprudence" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml index 1e4a9c2fef..d5b37d4a95 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_logical_fallacies.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_logical_fallacies.yaml @@ -1,6 +1,6 @@ "dataset_name": "logical_fallacies" "description": "The following are questions (with answers) about logical\ \ fallacies.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_logical_fallacies" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_logical_fallacies" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml index 477deda6aa..7fc0ffd41d 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_machine_learning.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_machine_learning.yaml @@ -1,6 +1,6 @@ "dataset_name": "machine_learning" "description": "The following are questions (with answers) about machine\ \ learning.\n\n" -"group": "mmlu_arc_style_stem" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_machine_learning" +"group": "mmlu_continuation_stem" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_machine_learning" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_management.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_management.yaml index 7315e9c5ed..fe25108ffd 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_management.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_management.yaml @@ -1,6 +1,6 @@ "dataset_name": "management" "description": "The following are questions (with answers) about management.\n\ \n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_management" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_management" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml new file mode 100644 index 0000000000..2beebd2982 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_marketing.yaml @@ -0,0 +1,6 @@ +"dataset_name": "marketing" +"description": "The following are questions (with answers) about marketing.\n\ + \n" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_marketing" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml similarity index 51% rename from lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml index 5d09745e30..04d63864d3 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_medical_genetics.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_medical_genetics.yaml @@ -1,6 +1,6 @@ "dataset_name": "medical_genetics" "description": "The following are questions (with answers) about medical\ \ genetics.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_medical_genetics" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_medical_genetics" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml index de8fa75236..95367c773c 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_miscellaneous.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_miscellaneous.yaml @@ -1,6 +1,6 @@ "dataset_name": "miscellaneous" "description": "The following are questions (with answers) about miscellaneous.\n\ \n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_miscellaneous" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_miscellaneous" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml index 2e3e60900c..82780df7a1 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_moral_disputes.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_moral_disputes.yaml @@ -1,6 +1,6 @@ "dataset_name": "moral_disputes" "description": "The following are questions (with answers) about moral\ \ disputes.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_moral_disputes" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_moral_disputes" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml index 5fb50c9ef2..d60487e508 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_moral_scenarios.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_moral_scenarios.yaml @@ -1,6 +1,6 @@ "dataset_name": "moral_scenarios" "description": "The following are questions (with answers) about moral\ \ scenarios.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_moral_scenarios" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_moral_scenarios" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml new file mode 100644 index 0000000000..b502fcf74d --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_nutrition.yaml @@ -0,0 +1,6 @@ +"dataset_name": "nutrition" +"description": "The following are questions (with answers) about nutrition.\n\ + \n" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_nutrition" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml new file mode 100644 index 0000000000..e3cc3d45fd --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_philosophy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "philosophy" +"description": "The following are questions (with answers) about philosophy.\n\ + \n" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_philosophy" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml new file mode 100644 index 0000000000..2c29ee75cd --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_prehistory.yaml @@ -0,0 +1,6 @@ +"dataset_name": "prehistory" +"description": "The following are questions (with answers) about prehistory.\n\ + \n" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_prehistory" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml index 9e52f864a8..588a4d4312 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_accounting.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_professional_accounting.yaml @@ -1,6 +1,6 @@ "dataset_name": "professional_accounting" "description": "The following are questions (with answers) about professional\ \ accounting.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_professional_accounting" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_professional_accounting" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml index e8421ec0b9..ea8b35a0dc 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_law.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_professional_law.yaml @@ -1,6 +1,6 @@ "dataset_name": "professional_law" "description": "The following are questions (with answers) about professional\ \ law.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_professional_law" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_professional_law" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml similarity index 52% rename from lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml index e33ca2dbd1..73bdc706d8 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_medicine.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_professional_medicine.yaml @@ -1,6 +1,6 @@ "dataset_name": "professional_medicine" "description": "The following are questions (with answers) about professional\ \ medicine.\n\n" -"group": "mmlu_arc_style_other" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_professional_medicine" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_professional_medicine" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml index a489057894..dab8599805 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_professional_psychology.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_professional_psychology.yaml @@ -1,6 +1,6 @@ "dataset_name": "professional_psychology" "description": "The following are questions (with answers) about professional\ \ psychology.\n\n" -"group": "mmlu_arc_style_social_sciences" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_professional_psychology" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_professional_psychology" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml new file mode 100644 index 0000000000..d4cffea20b --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_public_relations.yaml @@ -0,0 +1,6 @@ +"dataset_name": "public_relations" +"description": "The following are questions (with answers) about public\ + \ relations.\n\n" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_public_relations" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml new file mode 100644 index 0000000000..df0adc7834 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_security_studies.yaml @@ -0,0 +1,6 @@ +"dataset_name": "security_studies" +"description": "The following are questions (with answers) about security\ + \ studies.\n\n" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_security_studies" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml new file mode 100644 index 0000000000..5da671e1de --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_sociology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "sociology" +"description": "The following are questions (with answers) about sociology.\n\ + \n" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_sociology" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml new file mode 100644 index 0000000000..a426a5ec84 --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_us_foreign_policy.yaml @@ -0,0 +1,6 @@ +"dataset_name": "us_foreign_policy" +"description": "The following are questions (with answers) about us\ + \ foreign policy.\n\n" +"group": "mmlu_continuation_social_sciences" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_us_foreign_policy" diff --git a/lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml new file mode 100644 index 0000000000..589abc864b --- /dev/null +++ b/lm_eval/tasks/mmlu/continuation/mmlu_virology.yaml @@ -0,0 +1,6 @@ +"dataset_name": "virology" +"description": "The following are questions (with answers) about virology.\n\ + \n" +"group": "mmlu_continuation_other" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_virology" diff --git a/lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml b/lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml similarity index 50% rename from lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml rename to lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml index 02bfe08dc9..532442ef0d 100644 --- a/lm_eval/tasks/mmlu/arc_style/mmlu_world_religions.yaml +++ b/lm_eval/tasks/mmlu/continuation/mmlu_world_religions.yaml @@ -1,6 +1,6 @@ "dataset_name": "world_religions" "description": "The following are questions (with answers) about world\ \ religions.\n\n" -"group": "mmlu_arc_style_humanities" -"include": "_arc_style_template_yaml" -"task": "mmlu_arc_style_world_religions" +"group": "mmlu_continuation_humanities" +"include": "_continuation_template_yaml" +"task": "mmlu_continuation_world_religions"