Skip to content

Commit

Permalink
Add tldr dataset (#831)
Browse files Browse the repository at this point in the history
* add tldr prepare card

Signed-off-by: Yotam Perlitz <yotam.perlitz@ibm.com>

* add tldr to catalog

Signed-off-by: Yotam Perlitz <yotam.perlitz@ibm.com>

* Update prepare/cards/tldr.py

---------

Signed-off-by: Yotam Perlitz <yotam.perlitz@ibm.com>
Co-authored-by: Elron Bandel <elronbandel@gmail.com>
  • Loading branch information
perlitz and elronbandel committed May 16, 2024
1 parent cc8e6e9 commit b89b290
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
21 changes: 21 additions & 0 deletions prepare/cards/tldr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from unitxt import add_to_catalog
from unitxt.blocks import AddFields, SplitRandomMix, TaskCard
from unitxt.loaders import LoadHF
from unitxt.operators import RenameFields
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="webis/tldr-17", streaming=True),
preprocess_steps=[
SplitRandomMix({"train": "train[50%]", "test": "train[50%]"}),
RenameFields(field_to_field={"content": "document"}),
AddFields(fields={"document_type": "document"}),
],
task="tasks.summarization.abstractive",
templates="templates.summarization.abstractive.all",
)
test_card(
card,
format="formats.textual_assistant",
)
add_to_catalog(card, "cards.tldr", overwrite=True)
31 changes: 31 additions & 0 deletions src/unitxt/catalog/cards/tldr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"type": "task_card",
"loader": {
"type": "load_hf",
"path": "webis/tldr-17",
"streaming": true
},
"preprocess_steps": [
{
"type": "split_random_mix",
"mix": {
"train": "train[50%]",
"test": "train[50%]"
}
},
{
"type": "rename_fields",
"field_to_field": {
"content": "document"
}
},
{
"type": "add_fields",
"fields": {
"document_type": "document"
}
}
],
"task": "tasks.summarization.abstractive",
"templates": "templates.summarization.abstractive.all"
}

0 comments on commit b89b290

Please sign in to comment.