In [1]:
import torch
from datasets import load_dataset, load_metric
from transformers import LEDTokenizer, LEDForConditionalGeneration

In [2]:
data_test = load_dataset('bakhitovd/data_science_arxiv', split='test')

Found cached dataset json (C:/Users/bakhi/.cache/huggingface/datasets/bakhitovd___json/bakhitovd--data_science_arxiv-d562cf23e63fbcaf/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)


In [6]:
def generate_answer(batch):
    inputs_dict = tokenizer(batch["article"], padding="max_length", max_length=10240, return_tensors="pt", truncation=True)
    input_ids = inputs_dict.input_ids.to("cuda")
    attention_mask = inputs_dict.attention_mask.to("cuda")
    global_attention_mask = torch.zeros_like(attention_mask)
    # put global attention on <s> token
    global_attention_mask[:, 0] = 1
    
    predicted_abstract_ids = model.generate(input_ids, attention_mask=attention_mask, global_attention_mask=global_attention_mask, max_length=512)
    batch["predicted_abstract"] = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    return batch

# LED_7k_epoch_3

In [77]:
tokenizer = LEDTokenizer.from_pretrained("checkpoint-22500")

In [78]:
model = LEDForConditionalGeneration.from_pretrained("checkpoint-22500").to("cuda").half()

In [79]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.43337361986424644
rouge2:  0.1629855271710752
rougeL:  0.2459445128507289


# allenai/led-large-16384-arxiv

In [5]:
tokenizer = LEDTokenizer.from_pretrained("allenai/led-large-16384-arxiv")

In [6]:
model = LEDForConditionalGeneration.from_pretrained("allenai/led-large-16384-arxiv").to("cuda").half()

In [7]:
result = data_test.map(generate_answer, batched=True, batch_size=2)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

RuntimeError: CUDA out of memory. Tried to allocate 2.00 GiB (GPU 0; 8.00 GiB total capacity; 5.27 GiB already allocated; 0 bytes free; 6.86 GiB reserved in total by PyTorch)

led-large-16384-arxiv | 0.436322167 | 0.168023514 | 0.245217045

# allenai/led-large-16384-arxiv_512

In [4]:
tokenizer = LEDTokenizer.from_pretrained("allenai/led-large-16384-arxiv")

In [5]:
model = LEDForConditionalGeneration.from_pretrained("allenai/led-large-16384-arxiv").to("cuda").half()

In [6]:
result = data_test.map(generate_answer, batched=True, batch_size=2)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.43504442225757034
rouge2:  0.17124051696063441
rougeL:  0.24651911996386372


# NielsV/led-arxiv-10240

In [4]:
tokenizer = LEDTokenizer.from_pretrained("NielsV/led-arxiv-10240")

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/957 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

In [5]:
model = LEDForConditionalGeneration.from_pretrained("NielsV/led-arxiv-10240").to("cuda").half()

Downloading:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/648M [00:00<?, ?B/s]

In [7]:
result = data_test.map(generate_answer, batched=True, batch_size=2)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.4328558701386347
rouge2:  0.16079618784920696
rougeL:  0.2386949665758108


# ArtifactAI/led_base_16384_arxiv_summarization

In [4]:
tokenizer = LEDTokenizer.from_pretrained("ArtifactAI/led_base_16384_arxiv_summarization")

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/957 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

In [5]:
model = LEDForConditionalGeneration.from_pretrained("ArtifactAI/led_base_16384_arxiv_summarization").to("cuda").half()

Downloading:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/648M [00:00<?, ?B/s]

In [6]:
result = data_test.map(generate_answer, batched=True, batch_size=2)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.4267678394989245
rouge2:  0.15455973781429677
rougeL:  0.23100264873566928


# ccdv/lsg-bart-base-16384-arxiv

In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [11]:
tokenizer = AutoTokenizer.from_pretrained("ccdv/lsg-bart-base-16384-arxiv")

Downloading:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

In [13]:
model = AutoModelForSeq2SeqLM.from_pretrained("ccdv/lsg-bart-base-16384-arxiv", trust_remote_code=True).to("cuda").half()

Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.


Downloading:   0%|          | 0.00/39.4k [00:00<?, ?B/s]

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


In [20]:
def generate_answer_ccdv(batch):
    inputs_dict = tokenizer(batch["article"], padding="max_length", max_length=16384, return_tensors="pt", truncation=True)
    input_ids = inputs_dict.input_ids.to("cuda")
    attention_mask = inputs_dict.attention_mask.to("cuda")
    predicted_abstract_ids = model.generate(input_ids, attention_mask=attention_mask,  max_length=512)
    batch["predicted_abstract"] = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    return batch

In [22]:
result = data_test.map(generate_answer_ccdv, batched=True, batch_size=2)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)



rouge1:  0.42656256464416686
rouge2:  0.17277893778163966
rougeL:  0.2435920897670595


# ccdv/lsg-bart-base-4096-arxiv

In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [23]:
tokenizer = AutoTokenizer.from_pretrained("ccdv/lsg-bart-base-4096-arxiv")

Downloading:   0%|          | 0.00/403 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [24]:
model = AutoModelForSeq2SeqLM.from_pretrained("ccdv/lsg-bart-base-4096-arxiv", trust_remote_code=True).to("cuda").half()

Downloading:   0%|          | 0.00/2.58k [00:00<?, ?B/s]

Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.


Downloading:   0%|          | 0.00/39.4k [00:00<?, ?B/s]

Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


Downloading:   0%|          | 0.00/578M [00:00<?, ?B/s]

In [25]:
def generate_answer_ccdv(batch):
    inputs_dict = tokenizer(batch["article"], padding="max_length", max_length=4096, return_tensors="pt", truncation=True)
    input_ids = inputs_dict.input_ids.to("cuda")
    attention_mask = inputs_dict.attention_mask.to("cuda")
    predicted_abstract_ids = model.generate(input_ids, attention_mask=attention_mask,  max_length=512)
    batch["predicted_abstract"] = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    return batch

In [26]:
result = data_test.map(generate_answer_ccdv, batched=True, batch_size=8)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.41217009452333453
rouge2:  0.16086754601156766
rougeL:  0.23435867448479752


# google/pegasus-arxiv

In [3]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [4]:
tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-arxiv')
model = PegasusForConditionalGeneration.from_pretrained('google/pegasus-arxiv').to("cuda")

In [5]:
def generate_answer_pegas(batch):
    inputs_dict = tokenizer(batch["article"], padding="max_length", max_length=1024, return_tensors="pt", truncation=True)
    input_ids = inputs_dict.input_ids.to("cuda")
    predicted_abstract_ids = model.generate(input_ids, max_length=512)
    batch["predicted_abstract"] = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    return batch

In [6]:
result = data_test.map(generate_answer_pegas, batched=True, batch_size=2)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.39796053226809097
rouge2:  0.14236819313001944
rougeL:  0.22174633165223157


# google/bigbird-pegasus-large-arxiv

In [3]:
from transformers import BigBirdPegasusForConditionalGeneration, AutoTokenizer

In [4]:
tokenizer = AutoTokenizer.from_pretrained('google/bigbird-pegasus-large-arxiv')
model = BigBirdPegasusForConditionalGeneration.from_pretrained('google/bigbird-pegasus-large-arxiv', block_size=16, num_random_blocks=2).to("cuda")

In [5]:
def generate_answer_bigbird(batch):
    inputs_dict = tokenizer(batch["article"], padding="max_length", max_length=4096, return_tensors="pt", truncation=True)
    input_ids = inputs_dict.input_ids.to("cuda")
    predicted_abstract_ids = model.generate(input_ids, max_length=512)
    batch["predicted_abstract"] = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    return batch

In [6]:
result = data_test.map(generate_answer_bigbird, batched=True, batch_size=1)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ..\aten\src\ATen\native\BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
  rouge = load_metric("rouge")


rouge1:  0.30521526451025494
rouge2:  0.09321141661319818
rougeL:  0.19663637267315034


# google/bigbird-pegasus-large-arxiv

In [3]:
from transformers import BigBirdPegasusForConditionalGeneration, AutoTokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained('google/bigbird-pegasus-large-arxiv')
model = BigBirdPegasusForConditionalGeneration.from_pretrained('google/bigbird-pegasus-large-arxiv', block_size=32, num_random_blocks=4).to("cuda")

In [8]:
def generate_answer_bigbird(batch):
    inputs_dict = tokenizer(batch["article"], padding="max_length", max_length=4096, return_tensors="pt", truncation=True)
    input_ids = inputs_dict.input_ids.to("cuda")
    predicted_abstract_ids = model.generate(input_ids, max_length=512)
    batch["predicted_abstract"] = tokenizer.batch_decode(predicted_abstract_ids, skip_special_tokens=True)
    return batch

In [9]:
result = data_test.map(generate_answer_bigbird, batched=True, batch_size=1)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.402266896664768
rouge2:  0.1477705948116557
rougeL:  0.23141766776089584


# LED_7k_epoch_3.25

In [4]:
tokenizer = LEDTokenizer.from_pretrained("checkpoint-3+1000")

In [5]:
model = LEDForConditionalGeneration.from_pretrained("checkpoint-3+1000").to("cuda").half()

In [6]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.4347604554981861
rouge2:  0.16313799982262556
rougeL:  0.2455494779638553


# LED_7k_epoch_3.5

In [11]:
tokenizer = LEDTokenizer.from_pretrained("checkpoint-3+2000")

In [12]:
model = LEDForConditionalGeneration.from_pretrained("checkpoint-3+2000").to("cuda").half()

In [13]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.4339370198531878
rouge2:  0.16169393454842412
rougeL:  0.24416880991799178


# LED_7k_epoch_3.75

In [14]:
tokenizer = LEDTokenizer.from_pretrained("checkpoint-3+3000")

In [15]:
model = LEDForConditionalGeneration.from_pretrained("checkpoint-3+3000").to("cuda").half()

In [16]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.43616285781798514
rouge2:  0.16268108779101098
rougeL:  0.24411254082207115


# LED_7k_epoch_4

In [17]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_4")

In [18]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_4").to("cuda").half()

In [19]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.4358844669094102
rouge2:  0.16334316623445835
rougeL:  0.24502806847584777


# LED_7k_epoch_4.5

In [7]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_4.5")

In [8]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_4.5").to("cuda").half()

In [9]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.4363935623673012
rouge2:  0.1632467180044928
rougeL:  0.24383670081099068


# LED_7k_epoch_5

In [34]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_5")

In [35]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_5").to("cuda").half()

In [6]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.4420843777399375
rouge2:  0.16586350110140066
rougeL:  0.24705680106987288


# LED_16k_epoch_5

In [4]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_5")

In [5]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_5").to("cuda").half()

In [6]:
result = data_test.map(generate_answer, batched=True, batch_size=1)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

  rouge = load_metric("rouge")


rouge1:  0.4418645455988307
rouge2:  0.16757827735014008
rougeL:  0.24790280154672484


# LED_7k_epoch_5.25

In [13]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_5.25")

In [14]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_5.25").to("cuda").half()

In [15]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.43585895521476786
rouge2:  0.1605719477937792
rougeL:  0.2447807082040388


# LED_7k_epoch_5.5

In [10]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_5.5")

In [11]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_5.5").to("cuda").half()

In [12]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.4394120014536335
rouge2:  0.16441785815924614
rougeL:  0.24521893008188836


# LED_7k_epoch_6

In [26]:
tokenizer = LEDTokenizer.from_pretrained("LED_7k_epoch_6")

In [27]:
model = LEDForConditionalGeneration.from_pretrained("LED_7k_epoch_6").to("cuda").half()

In [28]:
result = data_test.map(generate_answer, batched=True, batch_size=4)
rouge = load_metric("rouge")
score =  rouge.compute(predictions=result["predicted_abstract"], references=result["abstract"], rouge_types=["rouge1", "rouge2", "rougeL"])
print('rouge1: ', score['rouge1'].mid.fmeasure)
print('rouge2: ', score['rouge2'].mid.fmeasure)
print('rougeL: ', score['rougeL'].mid.fmeasure)

Map:   0%|          | 0/1145 [00:00<?, ? examples/s]

rouge1:  0.4392286662569279
rouge2:  0.16368503033729087
rougeL:  0.24642040678686372
