In [None]:
import random
import torch
import numpy as np

def set_seed(seed_value=42):
    """Set seed for reproducibility for PyTorch and NumPy.
    Args:
        seed_value (int): The seed value to set for random number generators.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

    # Additional steps for deterministic behavior
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Set the seed
set_seed(42)  # You can replace 42 with any other seed value of your choice

In [None]:
#Load model directly
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn",device=0)
ftModelPath = './fine_tuned_BART_summarization'
ftSummarizer = pipeline("summarization",model=ftModelPath,device=0)



In [None]:
def get_summary_from_lm(rawText, model):
  max_length = round(len(rawText)*0.2)
  min_length = round(len(rawText)*0.02)
  output = model(rawText,max_length=max_length,min_length=min_length, do_sample=False)
  return output[0]['summary_text']


In [None]:
import pandas as pd
esAbstract = pd.read_csv('papers_EStoEN.csv')['abstract_translated']
jpAbstract = pd.read_csv('papers_JPtoEN.csv')['abstract_translated']

In [None]:
BART_summaryList_ES = []
for abstract in esAbstract:
  BART_summaryList_ES.append(get_summary_from_lm(abstract, summarizer))

BART_summaryList_JP = []
for abstract in jpAbstract:
  BART_summaryList_JP.append(get_summary_from_lm(abstract, summarizer))


Your max_length is set to 259, but your input_length is only 233. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=116)
Your max_length is set to 241, but your input_length is only 202. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=101)
Your max_length is set to 74, but your input_length is only 72. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)
Your max_length is set to 181, but your input_length is only 167. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=83)


In [None]:
ft_BART_summaryList_ES = []
for abstract in esAbstract:
  ft_BART_summaryList_ES.append(get_summary_from_lm(abstract, ftSummarizer))

ft_BART_summaryList_JP = []
for abstract in jpAbstract:
  ft_BART_summaryList_JP.append(get_summary_from_lm(abstract, ftSummarizer))


Your max_length is set to 259, but your input_length is only 233. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=116)
Your max_length is set to 241, but your input_length is only 202. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=101)
Your max_length is set to 74, but your input_length is only 72. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)
Your max_length is set to 181, but your input_length is only 167. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=83)


In [None]:
resultDictES = {
    "BART": BART_summaryList_ES,
    "BART_ft":ft_BART_summaryList_ES
}

resultDictJP = {
    "BART":BART_summaryList_JP,
    "BART_ft":ft_BART_summaryList_JP
}

pd.DataFrame(resultDictES).to_csv("summarization_BART_pipeline_ES.csv")
pd.DataFrame(resultDictJP).to_csv("summarization_BART_pipeline_JP.csv")