From 1f730731ed21d9cfb7539a459d4d29e7ebd3a17d Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 19:59:13 +0300 Subject: [PATCH 1/3] add tldr prepare card Signed-off-by: Yotam Perlitz --- prepare/cards/tldr.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 prepare/cards/tldr.py diff --git a/prepare/cards/tldr.py b/prepare/cards/tldr.py new file mode 100644 index 000000000..73d8b8f5c --- /dev/null +++ b/prepare/cards/tldr.py @@ -0,0 +1,21 @@ +from unitxt import add_to_catalog +from unitxt.blocks import AddFields, SplitRandomMix, TaskCard +from unitxt.loaders import LoadHF +from unitxt.operators import RenameFields +from unitxt.test_utils.card import test_card + +card = TaskCard( + loader=LoadHF(path="webis/tldr-17", streaming=True), + preprocess_steps=[ + SplitRandomMix({"train": "train[50%]", "test": "train[50%]"}), + RenameFields(field_to_field={"content": "document"}), + AddFields(fields={"document_type": "document"}), + ], + task="tasks.summarization.abstractive", + templates="templates.summarization.abstractive.all", +) +test_card( + card, + format="formats.textual_assistant", +) +add_to_catalog(card, "cards.tldr") From 5f76a674633cac27cdb3bd516c46a666686eedc7 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 19:59:31 +0300 Subject: [PATCH 2/3] add tldr to catalog Signed-off-by: Yotam Perlitz --- src/unitxt/catalog/cards/tldr.json | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/unitxt/catalog/cards/tldr.json diff --git a/src/unitxt/catalog/cards/tldr.json b/src/unitxt/catalog/cards/tldr.json new file mode 100644 index 000000000..f20f838c6 --- /dev/null +++ b/src/unitxt/catalog/cards/tldr.json @@ -0,0 +1,31 @@ +{ + "type": "task_card", + "loader": { + "type": "load_hf", + "path": "webis/tldr-17", + "streaming": true + }, + "preprocess_steps": [ + { + "type": "split_random_mix", + "mix": { + "train": "train[50%]", + "test": "train[50%]" + } + }, + { + "type": "rename_fields", + "field_to_field": { + "content": "document" + } + }, + { + "type": "add_fields", + "fields": { + "document_type": "document" + } + } + ], + "task": "tasks.summarization.abstractive", + "templates": "templates.summarization.abstractive.all" +} From f6c1d97a5c37473349e3a97d507c583fde6371c5 Mon Sep 17 00:00:00 2001 From: Elron Bandel Date: Thu, 16 May 2024 10:57:04 +0300 Subject: [PATCH 3/3] Update prepare/cards/tldr.py --- prepare/cards/tldr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prepare/cards/tldr.py b/prepare/cards/tldr.py index 73d8b8f5c..a78e1765f 100644 --- a/prepare/cards/tldr.py +++ b/prepare/cards/tldr.py @@ -18,4 +18,4 @@ card, format="formats.textual_assistant", ) -add_to_catalog(card, "cards.tldr") +add_to_catalog(card, "cards.tldr", overwrite=True)