Skip to content

Commit a67d0b8

Browse files
authored
Improve speed in mmlu global (#1895)
Signed-off-by: elronbandel <elronbandel@gmail.com>
1 parent 68ee369 commit a67d0b8

File tree

2,395 files changed

+4793
-19155
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,395 files changed

+4793
-19155
lines changed

prepare/cards/global_mmlu.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from unitxt.loaders import LoadHF
44
from unitxt.operators import (
55
Deduplicate,
6-
FilterByCondition,
76
ListFieldValues,
87
MapInstanceValues,
98
Set,
@@ -120,9 +119,12 @@
120119
for language in languages:
121120
for subject in subtasks:
122121
card = TaskCard(
123-
loader=LoadHF(path="CohereForAI/Global-MMLU", name=language),
122+
loader=LoadHF(
123+
path="CohereForAI/Global-MMLU",
124+
name=language,
125+
filtering_lambda=f"lambda x: x['subject'] == '{subject}'",
126+
),
124127
preprocess_steps=[
125-
FilterByCondition(values={"subject": subject}, condition="eq"),
126128
Deduplicate(by=["question", "subject", "answer"]),
127129
RenameSplits({"dev": "train"}),
128130
MapInstanceValues(

src/unitxt/catalog/cards/global_mmlu/am/abstract_algebra.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'abstract_algebra'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "abstract_algebra"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/anatomy.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'anatomy'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "anatomy"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/astronomy.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'astronomy'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "astronomy"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/business_ethics.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'business_ethics'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "business_ethics"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/clinical_knowledge.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'clinical_knowledge'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "clinical_knowledge"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/college_biology.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'college_biology'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "college_biology"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/college_chemistry.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'college_chemistry'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "college_chemistry"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/college_computer_science.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'college_computer_science'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "college_computer_science"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

src/unitxt/catalog/cards/global_mmlu/am/college_mathematics.json

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
"loader": {
44
"__type__": "load_hf",
55
"path": "CohereForAI/Global-MMLU",
6-
"name": "am"
6+
"name": "am",
7+
"filtering_lambda": "lambda x: x['subject'] == 'college_mathematics'"
78
},
89
"preprocess_steps": [
9-
{
10-
"__type__": "filter_by_condition",
11-
"values": {
12-
"subject": "college_mathematics"
13-
},
14-
"condition": "eq"
15-
},
1610
{
1711
"__type__": "deduplicate",
1812
"by": [

0 commit comments

Comments
 (0)