In [None]:
import random
import torch
import numpy as np

def set_seed(seed_value=42):
    """Set seed for reproducibility for PyTorch and NumPy.
    Args:
        seed_value (int): The seed value to set for random number generators.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

    # Additional steps for deterministic behavior
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set the seed
set_seed(42)  # You can replace 42 with any other seed value of your choice

In [None]:
#Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_plain = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
tokenizer_plain = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")

model_path = "./fine_tuned_BART_summarization"
model_fineTune = AutoModelForSeq2SeqLM.from_pretrained(model_path)
tokenizer_fineTune = AutoTokenizer.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_plain.to(device)
model_fineTune.to(device)

tokenizer_plain.pad_token = "[PAD]"  # Set a unique padding token
tokenizer_plain.eos_token = "[EOS]"  # Set an end-of-sequence token

tokenizer_fineTune.pad_token = "[PAD]"  # Set a unique padding token
tokenizer_fineTune.eos_token = "[EOS]"  # Set an end-of-sequence token






In [None]:
def clean_generated_text(output):
    cleaned_output = " ".join(output.split())
    return cleaned_output

def get_summary_from_llm(rawText, model, tokenizer):
    # Define the prompt
    prompt = f"""Summarize the following research abstract. Focus on main contributions.\n\n
   {rawText} \n\n
    Summary: """

    # Tokenize the input
    inputs = tokenizer(
        prompt,
        add_special_tokens=True,  # Ensure model-specific tokens are added
        return_tensors="pt",
        padding=False,  # Adjust if needed
        truncation=True,
        return_attention_mask=True
    )

    # Move tensors to the model's device
    input_ids = inputs['input_ids'].to(model.device)
    attention_mask = inputs['attention_mask'].to(model.device)

    # Generate output
    output_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=300,
        min_new_tokens=10,
        temperature=0.1,
    )

    # Decode the generated output
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Remove the prompt portion from the generated text
    output_cleaned = generated_text.replace(prompt, "").strip()

    print(output_cleaned)
    return output_cleaned


In [None]:
import pandas as pd
esAbstract = pd.read_csv('csvFiles/papers_EStoEN.csv')['abstract_translated']
jpAbstract = pd.read_csv('csvFiles/papers_JPtoEN.csv')['abstract_translated']

In [None]:
BART_summaryList_ES = []
for abstract in esAbstract:
  BART_summaryList_ES.append(get_summary_from_llm(abstract, model_plain, tokenizer_plain))

BART_summaryList_JP = []
for abstract in jpAbstract:
  BART_summaryList_JP.append(get_summary_from_llm(abstract, model_plain, tokenizer_plain))


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


The paper presents a case of urinary balantidiasis in a patient having chronic obstructive pulmonary disease. The results showed an average reduction of 54% in the new planned routes, compared to a sequential route.
The paper presents a case of urinary balantidiasis in a patient having chronic obstructive pulmonary disease who was on steroids for a long time. The computational program for the modeling of the fault by overturning in two dimensions (2D), called volteoLab, allows to interpret the fault mode type overturning.
The paper presents a case of urinary balantidiasis in a patient having chronic obstructive pulmonary disease who was on steroids for a long time. The study deals with the identification of the development of spatial visual skills in tasks of representation of revolutionary solids.
The paper presents a case of urinary balantidiasis in a patient having chronic obstructive pulmonary disease who was on steroids for a long time. This article presents a description of decis

In [None]:
ft_BART_summaryList_ES = []
for abstract in esAbstract:
  ft_BART_summaryList_ES.append(get_summary_from_llm(abstract, model_fineTune, tokenizer_fineTune))

ft_BART_summaryList_JP = []
for abstract in jpAbstract:
  ft_BART_summaryList_JP.append(get_summary_from_llm(abstract, model_fineTune, tokenizer_fineTune))


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


of the study, the researchers developed a genetic algorithm based on an ant colony to generate, theoretically, the verification routes for the monitoring and early detection of forest fires in the State of Mexico by means of unmanned aerial devices.
iaiglia coli is the causative ciliated protozoan protozoan protozoan disease. The study describes a new approach to model the fault by overturning in two dimensions (2D)
ine-related disease in a study on the development of spatial visual skills in students studying integral calculus. The study examined the development of spatial visual skills in the representation of revolutionary solids and conversion between registers to calculate their volume.
ine decision trees to determine whether a room is occupied or not. This article demonstrates empirically that it is possible to determine whether a room is occupied or not, using the variables temperature, humidity, luminosity, luminosity, CO2 level and humidity radius.
ine-disease is a rare zoonot

In [None]:
resultDictES = {
    "BART": BART_summaryList_ES,
    "BART_ft":ft_BART_summaryList_ES
}

resultDictJP = {
    "BART": BART_summaryList_JP,
    "BART_ft":ft_BART_summaryList_JP
}

pd.DataFrame(resultDictES).to_csv("summarization_BART_ES.csv")
pd.DataFrame(resultDictJP).to_csv("summarization_BART_JP.csv")