In [None]:
# Read File from Novels folder
file_path = 'Novels/novel.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()

In [None]:
# Checking if GPU is avaliable
import torch

if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is not available.")


In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from tqdm.auto import tqdm  # Import tqdm for progress bars

# Initialize model and tokenizer
model_name = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Function to chunk `text`
def chunk_text(text, chunk_size=400):
    """Breaks text into chunks of approximately 'chunk_size' length."""
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

# Assume 'text' is defined somewhere in your script
# Example: text = "Your long text here..."

# Chunk the text
chunks = chunk_text(text)

# Summarize each chunk with progress updates
summaries = []
for chunk in tqdm(chunks, desc="Summarizing"):
    # Prepend with 'summarize: ' and ensure the input is within model limits
    inputs = tokenizer.encode("summarize: " + chunk, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    summaries.append(summary)

# Combine summaries
final_summary = " ".join(summaries)
print(final_summary)

