In [14]:
import os
from BookSpliter import split_book
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [15]:
# Load Pegasus Model & Tokenizer
model_checkpoint = 'pegasus_summaryflow_model'
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [16]:
# Summarize the book recursively
def summarize_book(book):
    # Split the book into paragraphs
    splited_book = split_book(book)
    if len(splited_book) <= 10: # Base case
        result_book = "\n".join(splited_book)
        return result_book
    else:
        summary_list = []
        for i in range(len(splited_book)): # Loop through all paragraphs to generate summaries
            # Tokenize the input text
            inputs = tokenizer(splited_book[i], return_tensors="pt", max_length=1024, truncation=True)
            # Generate summary
            summary_ids = model.generate(inputs.input_ids.to(model.device))
            # Decode the summary tokens
            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            # Add to summary list
            summary_list.append(summary)
        # Combine all summarise into a single paragraph
        summarized_book = "\n".join(summary_list)
        # Recursively summarize the generated book
        print('A level is summarized successfully!!')
        print('Summarized book : ')
        print(summarized_book)
        return summarize_book(summarized_book)


In [17]:
def save_summary(book_name, summary):
    # Create a folder named "Summaries" if it doesn't already exist
    summaries_folder = r"D:\Gethub\SummaryFlow\Summaries"
    os.makedirs(summaries_folder, exist_ok=True)

    # Generate the file path for the summary text file
    summary_file_path = os.path.join(summaries_folder, book_name + " summary.txt")

    # Write the final summary to the text file
    with open(summary_file_path, 'w', encoding='utf-8') as f:
        f.write(summary)

In [18]:
# Try to summarize known book

# Take the book path
book_path = r"D:\Gethub\SummaryFlow\WholeNovels\Debt of Honor.txt"

# Read the book from the pdf file
with open(book_path, 'r') as file:
    # Read the entire contents of the file into a string
    book = file.read()

# Summarize the book and print it 
final_summary = summarize_book(book)
print("Final Book Summary : ")
print(final_summary)

# Save the summary to a text file
book_name = "The Alchemist"
save_summary(book_name, final_summary)


Splited successfully !!


KeyboardInterrupt: 