<a href="https://colab.research.google.com/github/SAKTHIKISHORE007/csa4736/blob/main/Untitled18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import pandas as pd
from sklearn.model_selection import train_test_split

class TextSummarizer:
    def __init__(self, model_name='t5-small'):
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(model_name)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

    def preprocess_data(self, texts, summaries, max_input_length=512, max_target_length=150):
        inputs = self.tokenizer(
            texts,
            max_length=max_input_length,
            padding=True,
            truncation=True,
            return_tensors='pt'
        )

        targets = self.tokenizer(
            summaries,
            max_length=max_target_length,
            padding=True,
            truncation=True,
            return_tensors='pt'
        )

        return inputs, targets

    def train(self, train_texts, train_summaries, val_texts=None, val_summaries=None, epochs=3):
        # Prepare training data
        train_inputs, train_targets = self.preprocess_data(train_texts, train_summaries)

        # Optional validation split
        if val_texts is None:
            train_texts, val_texts, train_summaries, val_summaries = train_test_split(
                train_texts, train_summaries, test_size=0.2
            )
            train_inputs, train_targets = self.preprocess_data(train_texts, train_summaries)
            val_inputs, val_targets = self.preprocess_data(val_texts, val_summaries)

        # Training loop
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=1e-4)

        for epoch in range(epochs):
            self.model.train()
            optimizer.zero_grad()

            outputs = self.model(
                input_ids=train_inputs['input_ids'].to(self.device),
                attention_mask=train_inputs['attention_mask'].to(self.device),
                labels=train_targets['input_ids'].to(self.device)
            )

            loss = outputs.loss
            loss.backward()
            optimizer.step()

            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

    def generate_summary(self, text, max_length=150, num_return_sequences=1):
        inputs = self.tokenizer(
            text,
            max_length=512,
            return_tensors='pt',
            truncation=True
        ).to(self.device)

        summary_ids = self.model.generate(
            inputs.input_ids,
            num_return_sequences=num_return_sequences,
            max_length=max_length,
            early_stopping=True
        )

        summaries = [
            self.tokenizer.decode(g, skip_special_tokens=True)
            for g in summary_ids
        ]

        return summaries

def main():
    # Example usage
    summarizer = TextSummarizer()

    # Sample long text
    long_text = """
    Artificial Intelligence is transforming multiple industries
    by providing advanced computational capabilities. Machine learning
    algorithms can now process vast amounts of data, recognize patterns,
    and make intelligent decisions across various domains including
    healthcare, finance, and technology sectors.
    """

    # Generate summary
    summary = summarizer.generate_summary(long_text)
    print("Original Text:", long_text)
    print("Generated Summary:", summary[0])

if __name__ == "__main__":
    main()

Original Text: 
    Artificial Intelligence is transforming multiple industries 
    by providing advanced computational capabilities. Machine learning 
    algorithms can now process vast amounts of data, recognize patterns, 
    and make intelligent decisions across various domains including 
    healthcare, finance, and technology sectors.
    
Generated Summary: Intelligence is transforming multiple industries by providing advanced computational capabilities. Machine learning algorithms can process vast amounts of data, recognize patterns, and make intelligent decisions across various domains including healthcare, finance, and technology sectors.
