<a href="https://colab.research.google.com/github/Gojo-Satoru-git/GEN-AI/blob/main/NEWSsummerizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import json
from bs4 import BeautifulSoup

# OpenRouter API Setup
API_KEY = "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"  # Replace with your actual OpenRouter API key
API_URL = "https://openrouter.ai/api/v1"
MODEL_ID = "mistralai/mistral-7b-instruct"  # You can change the model if needed

def fetch_news_from_url(url):
    """
    Fetches and extracts news text from a given URL.
    """
    try:
        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        # Extract article text (common tags: <p>, <article>, etc.)
        paragraphs = [p.get_text() for p in soup.find_all("p")]
        news_text = " ".join(paragraphs)

        return news_text if news_text else None
    except requests.exceptions.RequestException as e:
        print(f"❌ Error fetching news: {e}")
        return None

def summarize_text(text, summary_type="extractive", max_length=150):
    if summary_type not in ["extractive", "abstractive"]:
        raise ValueError("Invalid summary type. Choose 'extractive' or 'abstractive'.")

    prompt = f"Summarize the following news article in a {summary_type} manner within {max_length} words:\n{text}"

    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }

    data = {
        "model": MODEL_ID,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.5,
        "max_tokens": max_length
    }

    response = requests.post(API_URL, headers=headers, json=data)

    print("Response Status Code:", response.status_code)
    print("Response Text:", response.text)  # Debugging line

    if response.status_code == 200:
        try:
            result = response.json()
            return result["choices"][0]["message"]["content"].strip()
        except json.JSONDecodeError:
            return "❌ Error: Response is not in JSON format!"
    else:
        return f"❌ Error in summarization: {response.text}"

def generate_summary(input_text=None, url=None, summary_type="extractive", max_length=150, output_format="json"):
    """
    Fetches news from URL or accepts raw text, then generates a summary.

    :param input_text: Raw text input (optional)
    :param url: URL of the news article (optional)
    :param summary_type: "extractive" or "abstractive"
    :param max_length: Length of the summary
    :param output_format: "json" or "text"
    :return: Summary in the chosen format
    """
    if url:
        input_text = fetch_news_from_url(url)
        if not input_text:
            return "❌ Failed to extract news content."

    if not input_text:
        return "❌ No valid input provided for summarization."

    summary = summarize_text(input_text, summary_type, max_length)

    if output_format == "json":
        return json.dumps({"summary": summary, "summary_type": summary_type, "max_length": max_length}, indent=4)
    return summary

# Example Usage
if __name__ == "__main__":
    # Example URL (Replace with a real news article)
    news_url = "https://www.nbcnews.com/business/autos/trump-auto-tariffs-which-companies-how-much-when-what-to-know-rcna198223"

    # Generate extractive summary
    summary_result = generate_summary(url=news_url, summary_type="extractive", max_length=200, output_format="json")

    print(summary_result)
