In [1]:
# model.py
from transformers import pipeline
import nltk
from nltk.tokenize import sent_tokenize
import torch

# Download necessary NLTK data
nltk.download('punkt', quiet=True)

# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the pre-trained summarization model (T5)
summarizer = pipeline("summarization", model="t5-small", device=device)

def preprocess_text(text):
    # Tokenize the text into sentences
    sentences = sent_tokenize(text)
    
    # Join sentences back together, limiting to 1024 tokens
    preprocessed_text = " ".join(sentences)
    return preprocessed_text[:1024]

def generate_summary(text):
    # Preprocess the input text
    preprocessed_text = preprocess_text(text)
    
    # Generate summary
    summary = summarizer(preprocessed_text, max_length=150, min_length=50, do_sample=False)
    
    return summary[0]['summary_text']

# Example usage
if __name__ == "__main__":
    sample_text = """
    Artificial intelligence (AI) is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans. AI research has been defined as the field of study of intelligent agents, which refers to any system that perceives its environment and takes actions that maximize its chance of achieving its goals.
    The term "artificial intelligence" had previously been used to describe machines that mimic and display "human" cognitive skills that are associated with the human mind, such as "learning" and "problem-solving". This definition has since been rejected by major AI researchers who now describe AI in terms of rationality and acting rationally, which does not limit how intelligence can be articulated.
    """
    
    summary = generate_summary(sample_text)
    print("Summary:", summary)