In [1]:
# Importar las librerías necesarias
!pip install pytube
!pip install rouge_score
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install git+https://github.com/m-bain/whisperx.git --upgrade
!pip install datasets
!pip install sentencepiece
!pip install accelerate -U
!pip install bert-extractive-summarizer
!pip install bert-score

import concurrent.futures
import bert_score
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate import meteor_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import pandas as pd
from summarizer import Summarizer
from datasets import load_dataset
import sentencepiece
from accelerate import Accelerator
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, Trainer, TrainingArguments
from transformers import AutoTokenizer, AutoModelWithLMHead, AutoModelForSeq2SeqLM
from rouge_score import rouge_scorer
import json
from pytube import YouTube
import whisperx
import gc
import torch
from concurrent.futures import ThreadPoolExecutor
from transformers import MarianMTModel, MarianTokenizer
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AutoTokenizer, AutoModelWithLMHead, AutoModelForSeq2SeqLM
from rouge_score import rouge_scorer
import nltk
import re
from nltk.translate import meteor_score
nltk.download('wordnet')


def initialize_model_and_tokenizer(model_name):

    # Cargar el tokenizador y el modelo
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if (model_name=="tuner007/pegasus_summarizer" or model_name=="microsoft/prophetnet-large-uncased"):
      model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    else:
      model = AutoModelWithLMHead.from_pretrained(model_name)

    # Mover el modelo a la GPU si está disponible
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    return model, tokenizer, device

def sumary_final(inputs):
  summary_ids = model.generate(inputs, max_length=150, min_length=100, length_penalty=0.1, num_beams=16, no_repeat_ngram_size=4)
  return summary_ids

def transcribe_and_translate(url, source_lang='es', target_lang='en', device='cuda'):
    # Crear un objeto YouTube
    yt = YouTube(url)
    # Obtener el stream de audio de mayor calidad
    audio_stream = yt.streams.get_audio_only()
    # Descargar el audio
    audio_file = audio_stream.download()
    # Transcribir el audio usando whisperx
    model = whisperx.load_model("large-v2", device, compute_type="float16")
    audio = whisperx.load_audio(audio_file)
    result = model.transcribe(audio, batch_size=16)
    # Alinear el texto con el audio
    model_a, metadata = whisperx.load_align_model(language_code=source_lang, device=device)
    result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
    # Traducir el texto al inglés usando MarianMT
    modelo_nombre = f'Helsinki-NLP/opus-mt-{source_lang}-{target_lang}'
    modelo = MarianMTModel.from_pretrained(modelo_nombre).to(device)
    tokenizador = MarianTokenizer.from_pretrained(modelo_nombre)

    def traducir_segmento(segmento):
        id, texto = segmento
        # Codificar el texto y generar la traducción
        texto_tokenizado = tokenizador.prepare_seq2seq_batch([texto], return_tensors='pt').to(device)
        traduccion_ids = modelo.generate(**texto_tokenizado)
        traduccion = tokenizador.decode(traduccion_ids[0], skip_special_tokens=True)
        return id, traduccion

    # Codificar el texto y generar la traducción
    with ThreadPoolExecutor() as executor:
        segmentos = [(i, diccionario['text']) for i, diccionario in enumerate(result["segments"])]
        traducciones = list(executor.map(traducir_segmento, segmentos))
        # Ordenar las traducciones por id y juntarlas
        texto_traducido = " ".join(traduccion for id, traduccion in sorted(traducciones))
        # Devolver el texto traducido
    return texto_traducido


def divide_texto(texto, tokenizer, longitud_max=512):

    nltk.download('punkt', quiet=True)

    # Divide el texto en oraciones utilizando NLTK
    oraciones = nltk.tokenize.sent_tokenize(texto)

    trozos = []
    trozo_actual = ''
    for i, parrafo in enumerate(oraciones):
        if len(tokenizer.encode(trozo_actual + '\n' + parrafo)) <= longitud_max:
            trozo_actual += '\n' + parrafo
        else:
            trozos.append((i, trozo_actual))
            trozo_actual = parrafo
    trozos.append((i, trozo_actual))
    return trozos

def resumir_texto(trozo, model, tokenizer, device):
    id, texto = trozo
    # Codificar el texto y generar el resumen
    inputs = tokenizer.encode("summarize: " + texto, return_tensors='pt', max_length=512, truncation=True, padding='longest').to(device)
    summary_ids = model.generate(inputs, max_length=100, min_length=20, length_penalty=3, num_beams=4, early_stopping=True, no_repeat_ngram_size=3)
    resumen = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return id, resumen

def resumir_texto_final(trozo, model, tokenizer, device):
    id, texto = trozo
    # Codificar el texto y generar el resumen
    inputs = tokenizer.encode("summarize: " + texto, return_tensors='pt', truncation=True, padding='longest').to(device)
    summary_ids = sumary_final(inputs)
    resumen = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return id, resumen


def resumir_texto_paralelo(texto, model, tokenizer, device, max_length, print_option="no"):
    # Dividir el texto en trozos
    if len(tokenizer.encode(texto)) <= max_length:
        return resumir_texto_final((None, texto), model, tokenizer, device)[1]

    trozos = divide_texto(texto, tokenizer, max_length)

    # Si el número de tokens es menor que max_length, llama a resumir_texto_final


    resumenes = {}

    # Crea un pool de trabajadores para ejecutar resumir_texto en paralelo
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Mapea resumir_texto a cada trozo de texto
        future_to_trozo = {executor.submit(resumir_texto, trozo, model, tokenizer, device): trozo for trozo in trozos}

        for i, future in enumerate(concurrent.futures.as_completed(future_to_trozo)):
            trozo = future_to_trozo[future]
            try:
                id, resumen = future.result()
            except Exception as exc:
                print(f'El trozo {trozo} generó una excepción: {exc}')
            else:
                resumenes[id] = resumen
                porcentaje_completado = ((i + 1) / len(trozos)) * 100
                if (print_option!="no"):
                  print(f'Proceso completado: {i + 1}. Porcentaje completado: {porcentaje_completado}%')
                  print(f'Resumen: {resumen}')

    # Ordenar los resúmenes por id y juntarlos
    texto_resumen = ' '.join(resumen for id, resumen in sorted(resumenes.items()))

    # Si los tokens de texto_resumen son más que max_length, llama a resumir_texto_paralelo de forma recursiva
    texto_resumen = resumir_texto_paralelo(texto_resumen, model, tokenizer, device, max_length, print_option)

    return texto_resumen

def generar_resumen_extractivo(texto, ratio, algoritmo="pagerank"):
    # Inicializa el modelo
    model = Summarizer()

    # Genera el resumen
    resumen = model(texto, ratio=ratio, algorithm=algoritmo)

    return resumen


Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting git+https://github.com/m-bain/whisperx.git
  Cloning https://github.com/m-bain/whisperx.git to /tmp/pip-req-build-91_s7y4w
  Running command git clone --filter=blob:none --quiet https://github.com/m-bain/whisperx.git /tmp/pip-req-build-91_s7y4w
  Resolved https://github.com/m-bain/whisperx.git to commit f2da2f858e99e4211fe4f64b5f2938b007827e17
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting datasets
  Using cached datasets-2.18.0-py3-none-any.whl (510 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl (116 kB)
Collecting xxhash (from datasets)
  Using cached xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
Collecting multiprocess (from datasets)
  Using cached multiprocess-0.70.16-py310-none-any.whl (134 kB)
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datasets-2.18.0 dill-0.3.8 mult

  torchaudio.set_audio_backend("soundfile")
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [2]:
# URL del video de YouTube para el test
url = "https://www.youtube.com/watch?v=vgbMR0lDQBI"  # Reemplaza esto con la URL de tu video
url = "https://www.youtube.com/watch?v=nubNmOQLgxQ"
url = "https://www.youtube.com/watch?v=efoTZzqOrI8"
url = "https://www.youtube.com/watch?v=l3xxCR3LGaI"


# Longitud máxima del resumen para el test
max_length = 400  # Reemplaza esto con la longitud máxima que desees

# Transcribir y traducir el video al inglés
device ='cuda' if torch.cuda.is_available() else 'cpu'
translated_text = transcribe_and_translate(url, device=device )

vocabulary.txt:   0%|          | 0.00/460k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.80k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.20M [00:00<?, ?B/s]

model.bin:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

No language specified, language will be first be detected for each audio file (increases inference time).


100%|█████████████████████████████████████| 16.9M/16.9M [00:01<00:00, 11.3MiB/s]
INFO:pytorch_lightning.utilities.migration.utils:Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.2.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../root/.cache/torch/whisperx-vad-segmentation.bin`


Model was trained with pyannote.audio 0.0.1, yours is 3.1.1. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.2.1+cu121. Bad things might happen unless you revert torch to 1.x.
Detected language: es (0.97) in first 30s of audio...


Downloading: "https://download.pytorch.org/torchaudio/models/wav2vec2_voxpopuli_base_10k_asr_es.pt" to /root/.cache/torch/hub/checkpoints/wav2vec2_voxpopuli_base_10k_asr_es.pt
100%|██████████| 360M/360M [00:01<00:00, 246MB/s]


config.json:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/312M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/44.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/826k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

In [4]:
!pip install deep_translator
from deep_translator import GoogleTranslator

# Traducir de español a inglés
def traductor(text, source='en',target='es'):
  traductor = GoogleTranslator(source=source, target=target)
  resultado = traductor.translate(text)
  return resultado


Collecting deep_translator
  Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: deep_translator
Successfully installed deep_translator-1.11.4


In [5]:
from concurrent.futures import ThreadPoolExecutor

model_names = ["google-t5/t5-base", "tuner007/pegasus_summarizer", "facebook/bart-large-cnn", "microsoft/prophetnet-large-uncased"]
text = translated_text
reduced_text = generar_resumen_extractivo(text, ratio=0.3)

# Para cada modelo
for model_name in model_names:
    # Carga el tokenizador y el modelo
    model, tokenizer, device = initialize_model_and_tokenizer(model_name)

    #resumen 1 model
    _, summary_original = resumir_texto_final([_, translated_text], model, tokenizer, device)

    #resume pipeline
    summary_pipeline = resumir_texto_paralelo(text, model, tokenizer, device, max_length=400, print_option="no")

    #resumen 1 model extractive
    _, summary_original_extracted = resumir_texto_final([_, reduced_text], model, tokenizer, device)

    #resume pipeline
    summary_pipeline_extracted = resumir_texto_paralelo(reduced_text, model, tokenizer, device, max_length=400, print_option="no")

    print(f"Model: {model_name}")

    print("_________________________________________________________________\n\n")

    print(f"\n Generated Summary without the pipeline: {summary_original}")

    print(f"\n Generated Summary with the pipeline: {summary_pipeline}")

    print(f"\n Generated Summary with extractive summarization: {summary_original_extracted}")

    print(f"\n Generated Summary with pipeline and extractive summarization: {summary_pipeline_extracted}")

    print("_________________________________________________________________\n\n")


    print(f"\n Generated Summary without the pipeline: {traductor(summary_original)}")

    print(f"\n Generated Summary with the pipeline: {traductor(summary_pipeline)}")

    print(f"\n Generated Summary with extractive summarization: {traductor(summary_original_extracted)}")

    print(f"\n Generated Summary with pipeline and extractive summarization: {traductor(summary_pipeline_extracted)}")

    print("_________________________________________________________________\n\n")
    del model
    del tokenizer
    torch.cuda.empty_cache()

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (3001 > 512). Running this sequence through the model will result in indexing errors


Model: google-t5/t5-base
_________________________________________________________________



 Generated Summary without the pipeline: diabetes is a disease that can be incredibly complex and for which there are many types. today at hiperactin we talk about diabetes. this video is sponsored by the Universitat Politècnica de València. if you want to support science channels and vote on topics from upcoming videos, you can join Patreon and contribute to the cause, cnn.com's nina dos santos says.

 Generated Summary with the pipeline: the topic was voted by the channel's patrons in an ultra-fought poll. diabetes is a disease that can be incredibly complex and for which there are many types. there are also more types besides type 1 diabetes. follow a healthy diet or perform physical activity to prevent the disease. if you have type 1 diabetes, follow a doctor's advice on how to prevent it from developing into a type 2 diabetes, say experts.

 Generated Summary with extractive summarization

Token indices sequence length is longer than the specified maximum sequence length for this model (2755 > 1024). Running this sequence through the model will result in indexing errors


Model: tuner007/pegasus_summarizer
_________________________________________________________________



 Generated Summary without the pipeline: Diabetes is a disease that can be incredibly complex and for which there are many types, each with its particularities. There's type 1 diabetes, type 2 diabetes, modi diabetes, gestational diabetes, neonatal diabetes... What characterizes diabetes if there are so many types? How can a sugar-absorbing disease lead to such serious problems as limb amputation, kidney failure, loss of vision, or even heart attack? This video is sponsored by the Universitat Politcnica de Valncia.n

 Generated Summary with the pipeline: A new study has found that people with obesity lose the receptors for insulin, so they are unable to respond to this hormone and therefore to capture glucose. The study found that this insulin resistance and subsequent development of type 2 diabetes is so gradual that it makes it difficult to diagnose and also increases long-term com

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2804 > 1024). Running this sequence through the model will result in indexing errors


Model: facebook/bart-large-cnn
_________________________________________________________________



 Generated Summary without the pipeline: Insulin is a hormone that secretes the pancreas when it detects that blood glucose levels are elevated. The function of insulin is to induce body tissues to capture glucose to use it as energy, especially some tissues such as muscles, fat or fat tissue, and the liver. Insulin also intervenes in protein metabolism, because it stimulates cells to absorb amino acids from the diet and use them to synthesize their proteins. This video is sponsored by the Universitat Politècnica de València.

 Generated Summary with the pipeline: Insulin is a hormone that secretes the pancreas when it detects that blood glucose levels are elevated. The function of insulin is to induce body tissues to capture glucose to use it as energy. Type 1 diabetes is the least common of both and accounts for 5-10% of cases. For confidential support call the Samaritans on 08457 90 9

tokenizer_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

prophetnet.tokenizer:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.57G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2882 > 512). Running this sequence through the model will result in indexing errors


Model: microsoft/prophetnet-large-uncased
_________________________________________________________________



 Generated Summary without the pipeline: and i have to tell you that these words do you rather little justice. these words do not do you any justice. these are words that come to mind when i say diabetes... these words come to mind... words like sugar, insulin, glucose, glucose... and so on... there are so many different types of diabetes, each with its own characteristics... what characterizes diabetes? what characterizes it? there's so much going on here...

 Generated Summary with the pipeline: here is what i mean : i say that : here is : i say : diabetes is an incredibly complex disease that can do you little good : that's why i say that. here is : diabetes is incredibly complex disease : here's : and here is : here : i mean : here : here is what happens then, what happens then what happens? well, if such is the case then, then what then? if such as the case is then, then,

In [3]:
#Carga el modelo
print(translated_text)
model, tokenizer, device = initialize_model_and_tokenizer("tuner007/pegasus_summarizer")

summary = resumir_texto_paralelo(translated_text, model, tokenizer, device, max_length=400, print_option="yes")
_, resumen_1_try = resumir_texto_final([_, translated_text], model, tokenizer, device)
# Imprimir el resumen final
print("\n",summary)
print("\n",resumen_1_try)

If I ask you what words come to your mind when I say diabetes, you probably answer me things like sugar, insulin, glucose... but I have to tell you that these words do you rather little justice. Diabetes is a disease that can be incredibly complex and for which there are many types, each with its particularities. There's type 1 diabetes, type 2 diabetes, modi diabetes, gestational diabetes, neonatal diabetes... What characterizes diabetes if there are so many types? How can a sugar-absorbing disease lead to such serious problems as limb amputation, kidney failure, loss of vision, or even heart attack? Is there real reason to call it the 21st century epidemic? Today at Hiperactin we talk about diabetes. This video is sponsored by the Universitat Politècnica de València. In addition, this topic was voted by the channel's patrons in an ultra-fought poll as always, so if you also want to support this channel, if you want to support science channels, your favorite broadcasters and vote on t

tokenizer_config.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.34k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2755 > 1024). Running this sequence through the model will result in indexing errors


Proceso completado: 1. Porcentaje completado: 12.5%
Resumen: A new study has found that people with obesity lose the receptors for insulin, so they are unable to respond to this hormone and therefore to capture glucose. The study found that this insulin resistance and subsequent development of type 2 diabetes is so gradual that it makes it difficult to diagnose and also increases long-term complications.
Proceso completado: 2. Porcentaje completado: 25.0%
Resumen: Insulin stimulates cells to absorb the amino acids from the diet and use them to synthesize their proteins, which at this point in the channel should know that they are the molecules that perform virtually all the functions of a cell and allow it to function and be alive. Insulin is a hormone that is part of a very complex system.
Proceso completado: 3. Porcentaje completado: 37.5%
Resumen: Insulin is a hormone that secretes the pancreas when it detects that blood glucose levels are elevated. When the pancreas secretes it, in

In [None]:

# Define los modelos que quieres probar
model_names = ["tuner007/pegasus_summarizer", "facebook/bart-large-cnn", "t5-base", "microsoft/prophetnet-large-uncased"]

# Carga el conjunto de datos booksum
dataset = load_dataset('kmfoda/booksum')

# Inicializa el evaluador de ROUGE
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Para cada modelo
for model_name in model_names:
    # Carga el tokenizador y el modelo
    model, tokenizer, device = initialize_model_and_tokenizer(model_name)

    # Solo toma el primer ejemplo del conjunto de datos
    example = dataset['train'][0]

    # Define el texto que quieres resumir
    text = example['chapter']
    dictionary = json.loads(example['summary'])
    reference_summary = dictionary["summary"]
    reduced_text = generar_resumen_extractivo(text, ratio=0.3)

    #resumen 1 model
    _, summary_original = resumir_texto_final([_, text], model, tokenizer, device)

    #resume pipeline
    summary_pipeline = resumir_texto_paralelo(text, model, tokenizer, device, max_length=400, print_option="no")

    #resumen 1 model extractive
    _, summary_original_extracted = resumir_texto_final([_, reduced_text], model, tokenizer, device)

    #resume pipeline
    summary_pipeline_extracted = resumir_texto_paralelo(reduced_text, model, tokenizer, device, max_length=400, print_option="no")

    # Calcula las puntuaciones de ROUGE
    scores_original = scorer.score(summary_original,reference_summary)
    scores_pipeline = scorer.score(summary_pipeline,reference_summary)
    scores_original_extracted = scorer.score(summary_original_extracted,reference_summary)
    scores_pipeline_extracted = scorer.score(summary_pipeline_extracted,reference_summary)

    # Calcula las puntuaciones
    scores = {
        "model_name": [],
        "Model": [],
        "ROUGE1-Precision": [],
        "ROUGE1-F1": [],
        "ROUGE2-Precision": [],
        "ROUGE2-F1": [],
        "ROUGEL-Precision": [],
        "ROUGEL-F1": [],
        "BLEU": [],
        "METEOR": [],
        "Cosine Similarity": [],
        "BERTScore": [],
    }

    summaries = [summary_original, summary_pipeline, summary_original_extracted, summary_pipeline_extracted]
    names = ["Original", "Pipeline", "Original Extracted", "Pipeline Extracted"]

    for name, summary in zip(names, summaries):
        scores["model_name"].append(model_name)
        scores["Model"].append(name)
        rouge = scorer.score(summary, reference_summary)
        scores["ROUGE1-Precision"].append(rouge['rouge1'].precision)  # Añade la precisión de ROUGE1
        scores["ROUGE1-F1"].append(rouge['rouge1'].fmeasure)  # Añade la puntuación F1 de ROUGE1
        scores["ROUGE2-Precision"].append(rouge['rouge2'].precision)  # Añade la precisión de ROUGE2
        scores["ROUGE2-F1"].append(rouge['rouge2'].fmeasure)  # Añade la puntuación F1 de ROUGE2
        scores["ROUGEL-Precision"].append(rouge['rougeL'].precision)  # Añade la precisión de ROUGEL
        scores["ROUGEL-F1"].append(rouge['rougeL'].fmeasure)  # Añade la puntuación F1 de ROUGEL
        scores["BLEU"].append(sentence_bleu(reference_summary, summary))
        scores["METEOR"].append(meteor_score.single_meteor_score(reference_summary.split(), summary.split())) # Para la similitud del coseno, primero convertimos los textos a vectores
        vectorizer = CountVectorizer().fit_transform([summary, reference_summary])
        vectors = vectorizer.toarray()
        csim = cosine_similarity(vectors)
        scores["Cosine Similarity"].append(csim[0,1])  # Obtenemos la similitud del coseno del primer texto con el segundo

        # BERTScore
        #P, R, F1 = bert_score.score([summary], [reference_summary], lang='en', verbose=True)
        #print(f"BERTScore: P={P.mean()}, R={R.mean()}, F1={F1.mean()}")
        scores["BERTScore"].append(1)#P.mean())
    # Presenta los resultados en una tabla
    df = pd.DataFrame(scores)
    display(df)

    print(f"Model: {model_name}")

    print(f"Reference Summary: {reference_summary}")
    print("_________________________________________________________________\n\n")

    print(f"\n Generated Summary without the pipeline: {summary_original}")

    print(f"\n Generated Summary with the pipeline: {summary_pipeline}")

    print(f"\n Generated Summary with extractive summarization: {summary_original_extracted}")

    print(f"\n Generated Summary with pipeline and extractive summarization: {summary_pipeline_extracted}")

    print("_________________________________________________________________\n\n")


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.00 MiB. GPU 0 has a total capacty of 14.75 GiB of which 1.06 MiB is free. Process 9001 has 14.74 GiB memory in use. Of the allocated memory 14.44 GiB is allocated by PyTorch, and 137.93 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelWithLMHead

# Define los modelos que quieres probar
model_names = ["tuner007/pegasus_summarizer"]#, "facebook/bart-large-cnn", "t5-base", "microsoft/prophetnet-large-uncased"]

# Carga el conjunto de datos booksum
dataset = load_dataset('kmfoda/booksum')

# Inicializa el evaluador de ROUGE
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Inicializa las puntuaciones
scores = {
    "model_name": [],
    "Model": [],
    "ROUGE1-Precision": [],
    "ROUGE1-F1": [],
    "ROUGE2-Precision": [],
    "ROUGE2-F1": [],
    "ROUGEL-Precision": [],
    "ROUGEL-F1": [],
    "BLEU": [],
    "METEOR": [],
    "Cosine Similarity": [],
    "BERTScore": [],
}
results = {
    "model_name": [],
    "Model": [],
    "ROUGE1-Precision": [],
    "ROUGE1-F1": [],
    "ROUGE2-Precision": [],
    "ROUGE2-F1": [],
    "ROUGEL-Precision": [],
    "ROUGEL-F1": [],
    "BLEU": [],
    "METEOR": [],
    "Cosine Similarity": [],
    "BERTScore": [],
}

# Para cada modelo
for model_name in model_names:
    # Carga el tokenizador y el modelo solo una vez por modelo
    model, tokenizer, device = initialize_model_and_tokenizer(model_name)

    # Toma los primeros 100 ejemplos del conjunto de datos
    for i in range(0, 10, 10):  # Procesa los ejemplos en batches de 10

        texts = [dataset['train'][j]['chapter'] for j in range(i, min(i+10, 10))]

        reference_summaries = [json.loads(dataset['train'][j]['summary'])["summary"] for j in range(i, min(i+10, 100))]

        summaries = [ _ , summary_original = resumir_texto_final([_, text], model, tokenizer, device) for text in texts]

        for text, reference_summary, summary in zip(texts, reference_summaries, summaries):
            scores["model_name"].append(model_name)
            scores["Model"].append("Model")
            rouge = scorer.score(summary, reference_summary)
            scores["ROUGE1-Precision"].append(rouge['rouge1'].precision)
            scores["ROUGE1-F1"].append(rouge['rouge1'].fmeasure)
            scores["ROUGE2-Precision"].append(rouge['rouge2'].precision)
            scores["ROUGE2-F1"].append(rouge['rouge2'].fmeasure)
            scores["ROUGEL-Precision"].append(rouge['rougeL'].precision)
            scores["ROUGEL-F1"].append(rouge['rougeL'].fmeasure)
            scores["BLEU"].append(sentence_bleu(reference_summary, summary))
            scores["METEOR"].append(meteor_score.single_meteor_score(reference_summary.split(), summary.split())) # Para la similitud del coseno, primero convertimos los textos a vectores
            vectorizer = CountVectorizer().fit_transform([summary, reference_summary])
            vectors = vectorizer.toarray()
            csim = cosine_similarity(vectors)
            scores["Cosine Similarity"].append(csim[0,1])  # Obtenemos la similitud del coseno del primer texto con el segundo

            # BERTScore
            #P, R, F1 = bert_score.score([summary], [reference_summary], lang='en', verbose=True)
            scores["BERTScore"].append(1) #P.mean())

# Calcula las medias de las puntuaciones
for key in scores:
    if key not in ["model_name", "Model"]:
        results[key] = sum(scores[key]) / len(scores[key])

# Presenta los resultados en una tabla
df = pd.DataFrame(scores)
display(df)


In [None]:
 import numpy as np

# Define los modelos que quieres probar
model_names = ["tuner007/pegasus_summarizer", "facebook/bart-large-cnn", "t5-base", "microsoft/prophetnet-large-uncased"]

# Carga el conjunto de datos booksum
dataset = load_dataset('kmfoda/booksum')

# Inicializa el evaluador de ROUGE
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Para cada modelo
for model_name in model_names:
    # Carga el tokenizador y el modelo
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if (model_name=="tuner007/pegasus_summarizer" or model_name=="microsoft/prophetnet-large-uncased"):
      model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    else:
      model = AutoModelWithLMHead.from_pretrained(model_name)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Inicializa las listas para almacenar las puntuaciones
    rouge1_precision_scores = []
    rouge1_recall_scores = []
    rouge1_fmeasure_scores = []
    rouge2_precision_scores = []
    rouge2_recall_scores = []
    rouge2_fmeasure_scores = []
    rougeL_precision_scores = []
    rougeL_recall_scores = []
    rougeL_fmeasure_scores = []

    # Solo toma los primeros 10 ejemplos del conjunto de datos
    for i in range(3):
        # Define el texto que quieres resumir
        text = dataset['train'][i]['chapter']
        dictionary = json.loads(dataset['train'][i]['summary'])
        reference_summary = dictionary["summary"]

        summary = resumir_texto_paralelo(text, model, tokenizer, device, max_length=400, print_option="no")

        # Calcula las puntuaciones de ROUGE
        scores = scorer.score(summary,reference_summary)

        # Almacena las puntuaciones de ROUGE
        rouge1_precision_scores.append(scores['rouge1'].precision)
        rouge1_recall_scores.append(scores['rouge1'].recall)
        rouge1_fmeasure_scores.append(scores['rouge1'].fmeasure)
        rouge2_precision_scores.append(scores['rouge2'].precision)
        rouge2_recall_scores.append(scores['rouge2'].recall)
        rouge2_fmeasure_scores.append(scores['rouge2'].fmeasure)
        rougeL_precision_scores.append(scores['rougeL'].precision)
        rougeL_recall_scores.append(scores['rougeL'].recall)
        rougeL_fmeasure_scores.append(scores['rougeL'].fmeasure)

    # Calcula la media de las puntuaciones de ROUGE
    mean_rouge1_precision = np.mean(rouge1_precision_scores)
    mean_rouge1_recall = np.mean(rouge1_recall_scores)
    mean_rouge1_fmeasure = np.mean(rouge1_fmeasure_scores)
    mean_rouge2_precision = np.mean(rouge2_precision_scores)
    mean_rouge2_recall = np.mean(rouge2_recall_scores)
    mean_rouge2_fmeasure = np.mean(rouge2_fmeasure_scores)
    mean_rougeL_precision = np.mean(rougeL_precision_scores)
    mean_rougeL_recall = np.mean(rougeL_recall_scores)
    mean_rougeL_fmeasure = np.mean(rougeL_fmeasure_scores)

    print(f"Model: {model_name}")
    print(f"Mean ROUGE-1 Precision: {mean_rouge1_precision}")
    print(f"Mean ROUGE-1 Recall: {mean_rouge1_recall}")
    print(f"Mean ROUGE-1 F-measure: {mean_rouge1_fmeasure}")
    print("__________________\n\n")
    print(f"Mean ROUGE-2 Precision: {mean_rouge2_precision}")
    print(f"Mean ROUGE-2 Recall: {mean_rouge2_recall}")
    print(f"Mean ROUGE-2 F-measure: {mean_rouge2_fmeasure}")
    print("__________________\n\n")
    print(f"Mean ROUGE-L Precision: {mean_rougeL_precision}")
    print(f"Mean ROUGE-L Recall: {mean_rougeL_recall}")
    print(f"Mean ROUGE-L F-measure: {mean_rougeL_fmeasure}")
    print("_________________________________________________________________\n\n")


Token indices sequence length is longer than the specified maximum sequence length for this model (8550 > 1024). Running this sequence through the model will result in indexing errors


Model: tuner007/pegasus_summarizer
Mean ROUGE-1 Precision: 0.3069334288899506
Mean ROUGE-1 Recall: 0.35754230598351383
Mean ROUGE-1 F-measure: 0.3239347260215229
__________________


Mean ROUGE-2 Precision: 0.044415364722664864
Mean ROUGE-2 Recall: 0.05317180786541056
Mean ROUGE-2 F-measure: 0.04739512605354188
__________________


Mean ROUGE-L Precision: 0.14288501353718744
Mean ROUGE-L Recall: 0.1678443750465318
Mean ROUGE-L F-measure: 0.15146931502605057
_________________________________________________________________




Token indices sequence length is longer than the specified maximum sequence length for this model (9758 > 1024). Running this sequence through the model will result in indexing errors


KeyboardInterrupt: 