#Summariser AI

#install generative ai package

In [None]:
# Install required packages in Google Colab

!pip install google-generativeai
!pip install transformers
!pip install torch
!pip install nltk

print("✅ All packages installed successfully!")
print("Now you can run the main summarizer code.")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

#multi model text summariser

In [None]:
import google.generativeai as genai
import openai
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import FreqDist
import heapq
import re
import time
from typing import Dict, List, Optional
import warnings
warnings.filterwarnings("ignore")

# Download required NLTK data
try:
    nltk.download('punkt', quiet=True)
    nltk.download('punkt_tab', quiet=True)
    nltk.download('stopwords', quiet=True)
    print("✅ NLTK data downloaded successfully")
except Exception as e:
    print(f"⚠️ NLTK download warning: {e}")
    # Try alternative downloads
    try:
        nltk.download('all', quiet=True)
    except:
        pass

class TextSummarizer:
    def __init__(self):
        # Configure Gemini API
        self.gemini_api_key = "AIzaSyD0GHCUB82y5RZKYGAp0rp9p2d3CIem6UQ"  # Your API key goes here
        genai.configure(api_key=self.gemini_api_key)
        # Try different model names based on availability
        try:
            # Try newer model names first
            self.gemini_model = genai.GenerativeModel('gemini-1.5-flash')
        except:
            try:
                self.gemini_model = genai.GenerativeModel('gemini-1.0-pro')
            except:
                try:
                    self.gemini_model = genai.GenerativeModel('models/gemini-1.5-flash')
                except:
                    self.gemini_model = None
                    print("⚠️ Could not load Gemini model. Will skip Gemini summarization.")

        # Initialize other models
        self.huggingface_summarizer = None
        self.load_huggingface_model()

    def load_huggingface_model(self):
        """Load HuggingFace summarization model"""
        try:
            self.huggingface_summarizer = pipeline(
                "summarization",
                model="facebook/bart-large-cnn",
                tokenizer="facebook/bart-large-cnn"
            )
            print("✅ HuggingFace BART model loaded successfully")
        except Exception as e:
            print(f"❌ Failed to load HuggingFace model: {e}")

    def gemini_summarize(self, text: str, summary_type: str = "concise") -> str:
        """Summarize text using Google Gemini"""
        if not self.gemini_model:
            return "⚠️ Gemini model not available"

        try:
            prompts = {
                "concise": f"Provide a concise summary of the following text in 2-3 sentences:\n\n{text}",
                "detailed": f"Provide a detailed summary of the following text, covering all main points:\n\n{text}",
                "bullet_points": f"Summarize the following text as bullet points highlighting key information:\n\n{text}",
                "executive": f"Provide an executive summary of the following text suitable for business purposes:\n\n{text}"
            }

            prompt = prompts.get(summary_type, prompts["concise"])
            response = self.gemini_model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            if "403" in str(e) or "disabled" in str(e).lower():
                return f"⚠️ Gemini API not enabled. Please enable it at: https://console.developers.google.com/apis/api/generativelanguage.googleapis.com/overview"
            elif "404" in str(e) or "not found" in str(e).lower():
                return f"⚠️ Model not found. Try updating google-generativeai: pip install --upgrade google-generativeai"
            return f"Error with Gemini summarization: {e}"

    def huggingface_summarize(self, text: str, max_length: int = 150, min_length: int = 50) -> str:
        """Summarize text using HuggingFace BART model"""
        try:
            if not self.huggingface_summarizer:
                return "HuggingFace model not available"

            # BART has token limits, so we might need to chunk long texts
            max_input_length = 1024
            if len(text.split()) > max_input_length:
                # Split into chunks and summarize each
                chunks = self._chunk_text(text, max_input_length)
                summaries = []
                for chunk in chunks:
                    result = self.huggingface_summarizer(
                        chunk,
                        max_length=max_length//len(chunks),
                        min_length=min_length//len(chunks),
                        do_sample=False
                    )
                    summaries.append(result[0]['summary_text'])
                return " ".join(summaries)
            else:
                result = self.huggingface_summarizer(
                    text,
                    max_length=max_length,
                    min_length=min_length,
                    do_sample=False
                )
                return result[0]['summary_text']
        except Exception as e:
            return f"Error with HuggingFace summarization: {e}"

    def extractive_summarize(self, text: str, num_sentences: int = 3) -> str:
        """Create extractive summary using frequency-based approach"""
        try:
            # Tokenize into sentences
            sentences = sent_tokenize(text)
            if len(sentences) <= num_sentences:
                return text

            # Remove stopwords and calculate word frequencies
            stop_words = set(stopwords.words('english'))
            words = word_tokenize(text.lower())
            words = [word for word in words if word.isalnum() and word not in stop_words]

            freq_dist = FreqDist(words)

            # Score sentences based on word frequencies
            sentence_scores = {}
            for sentence in sentences:
                words_in_sentence = word_tokenize(sentence.lower())
                score = 0
                word_count = 0
                for word in words_in_sentence:
                    if word in freq_dist:
                        score += freq_dist[word]
                        word_count += 1
                if word_count > 0:
                    sentence_scores[sentence] = score / word_count

            # Get top sentences
            top_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)

            # Return in original order
            summary_sentences = []
            for sentence in sentences:
                if sentence in top_sentences:
                    summary_sentences.append(sentence)

            return " ".join(summary_sentences)
        except Exception as e:
            return f"Error with extractive summarization: {e}"

    def _chunk_text(self, text: str, max_words: int) -> List[str]:
        """Split text into chunks of specified word count"""
        words = text.split()
        chunks = []
        for i in range(0, len(words), max_words):
            chunk = " ".join(words[i:i + max_words])
            chunks.append(chunk)
        return chunks

    def compare_models(self, text: str) -> Dict[str, str]:
        """Compare different summarization approaches"""
        print("🔄 Generating summaries with different models...")

        results = {}

        # Gemini summaries
        print("📝 Generating Gemini summaries...")
        results["Gemini (Concise)"] = self.gemini_summarize(text, "concise")
        results["Gemini (Detailed)"] = self.gemini_summarize(text, "detailed")
        results["Gemini (Bullet Points)"] = self.gemini_summarize(text, "bullet_points")

        # HuggingFace summary
        print("🤗 Generating HuggingFace BART summary...")
        results["HuggingFace BART"] = self.huggingface_summarize(text)

        # Extractive summary
        print("📊 Generating Extractive summary...")
        results["Extractive (Frequency-based)"] = self.extractive_summarize(text)

        return results

    def analyze_summaries(self, original_text: str, summaries: Dict[str, str]) -> Dict[str, Dict]:
        """Analyze and compare summary characteristics"""
        analysis = {}
        original_word_count = len(original_text.split())
        original_sentence_count = len(sent_tokenize(original_text))

        for model_name, summary in summaries.items():
            word_count = len(summary.split())
            sentence_count = len(sent_tokenize(summary))
            compression_ratio = round((1 - word_count / original_word_count) * 100, 1)

            analysis[model_name] = {
                "word_count": word_count,
                "sentence_count": sentence_count,
                "compression_ratio": f"{compression_ratio}%",
                "summary": summary
            }

        return analysis

def main():
    # Initialize summarizer
    summarizer = TextSummarizer()

    # Sample text for testing
    sample_text = """
    Artificial Intelligence (AI) has become one of the most transformative technologies of the 21st century,
    revolutionizing industries from healthcare to finance, transportation to entertainment. At its core, AI
    refers to the simulation of human intelligence in machines that are programmed to think and learn like humans.
    These systems can perform tasks that typically require human intelligence, such as visual perception,
    speech recognition, decision-making, and language translation.

    The development of AI can be traced back to the 1950s when computer scientists first began exploring
    the possibility of creating machines that could mimic human thought processes. Early pioneers like
    Alan Turing proposed the famous Turing Test as a measure of machine intelligence. However, it wasn't
    until recent decades, with the advent of powerful computers and vast amounts of data, that AI has
    reached its current level of sophistication.

    Machine Learning, a subset of AI, has been particularly instrumental in recent breakthroughs.
    This approach allows systems to automatically learn and improve from experience without being
    explicitly programmed for every scenario. Deep Learning, which uses neural networks with multiple
    layers, has enabled remarkable achievements in image recognition, natural language processing,
    and game playing, with systems like AlphaGo defeating world champions in complex strategy games.

    The applications of AI are vast and growing. In healthcare, AI systems can analyze medical images
    to detect diseases earlier and more accurately than human doctors in some cases. In autonomous
    vehicles, AI processes sensor data to navigate safely through traffic. In finance, AI algorithms
    detect fraudulent transactions and make investment decisions. Virtual assistants like Siri and
    Alexa use AI to understand and respond to natural language queries.

    However, the rapid advancement of AI also raises important ethical and societal questions.
    Concerns about job displacement, privacy, algorithmic bias, and the potential for misuse of
    AI technologies have sparked debates among policymakers, researchers, and the public. As AI
    systems become more powerful and ubiquitous, ensuring they are developed and deployed
    responsibly becomes increasingly critical for society's benefit.
    """

    print("🚀 Multi-Model Text Summarizer")
    print("=" * 50)

    # Option to use custom text
    use_custom = input("Would you like to use custom text? (y/n): ").lower().strip()
    if use_custom == 'y':
        print("\nEnter your text (press Enter twice when finished):")
        lines = []
        while True:
            line = input()
            if line == "":
                break
            lines.append(line)
        text_to_summarize = "\n".join(lines)
    else:
        text_to_summarize = sample_text
        print("\nUsing sample text about Artificial Intelligence...")

    print(f"\n📄 Original text length: {len(text_to_summarize.split())} words")
    print("-" * 50)

    # Generate summaries
    start_time = time.time()
    summaries = summarizer.compare_models(text_to_summarize)
    end_time = time.time()

    # Analyze results
    analysis = summarizer.analyze_summaries(text_to_summarize, summaries)

    # Display results
    print(f"\n⏱️  Total processing time: {end_time - start_time:.2f} seconds")
    print("\n" + "=" * 80)
    print("📊 SUMMARY COMPARISON RESULTS")
    print("=" * 80)

    for model_name, data in analysis.items():
        print(f"\n🔹 {model_name}")
        print(f"   Words: {data['word_count']} | Sentences: {data['sentence_count']} | Compression: {data['compression_ratio']}")
        print(f"   Summary: {data['summary']}")
        print("-" * 80)

    # Performance comparison table
    print("\n📈 PERFORMANCE METRICS")
    print("-" * 60)
    print(f"{'Model':<25} {'Words':<8} {'Sentences':<10} {'Compression':<12}")
    print("-" * 60)
    for model_name, data in analysis.items():
        print(f"{model_name:<25} {data['word_count']:<8} {data['sentence_count']:<10} {data['compression_ratio']:<12}")

    print("\n✅ Analysis complete!")

if __name__ == "__main__":
    # Install required packages (run these in terminal if needed):
    # pip install google-generativeai transformers torch nltk

    main()

✅ NLTK data downloaded successfully


Device set to use cpu


✅ HuggingFace BART model loaded successfully
🚀 Multi-Model Text Summarizer
Would you like to use custom text? (y/n): n

Using sample text about Artificial Intelligence...

📄 Original text length: 326 words
--------------------------------------------------
🔄 Generating summaries with different models...
📝 Generating Gemini summaries...
🤗 Generating HuggingFace BART summary...
📊 Generating Extractive summary...

⏱️  Total processing time: 52.74 seconds

📊 SUMMARY COMPARISON RESULTS

🔹 Gemini (Concise)
   Words: 61 | Sentences: 3 | Compression: 81.3%
   Summary: AI, simulating human intelligence in machines, is rapidly transforming numerous industries through advancements in machine and deep learning.  Its applications range from healthcare diagnostics to autonomous vehicles and financial analysis, but ethical concerns regarding job displacement, bias, and misuse necessitate responsible development and deployment.  The field's evolution from its 1950s origins has been propelled by increa