In [34]:
!pip install nest_asyncio  # Install if not installed

import nest_asyncio
nest_asyncio.apply()



In [36]:
import os
import logging
import asyncio
import aiohttp
import json
import spacy
import torch
from typing import List, Dict
from transformers import pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ✅ API Configuration
NEWS_API_KEY = "pub_713126568809cbb479e396fcef2b6c9d38644"
NEWS_API_URL = "https://newsdata.io/api/1/news"

# ✅ Countries
COUNTRIES = {
    "India": "in", "USA": "us", "UK": "gb", "Canada": "ca", "Australia": "au",
    "Germany": "de", "France": "fr", "China": "cn", "Japan": "jp"
}

# ✅ NLP Initialization
sentiment_analyzer = SentimentIntensityAnalyzer()
nlp = spacy.load("en_core_web_sm")

# ✅ Fetch News Asynchronously
class NewsFetcher:
    async def fetch_articles(self, session: aiohttp.ClientSession, query: str, country: str) -> List[Dict]:
        params = {
            "apikey": NEWS_API_KEY,
            "q": query,
            "country": country.lower(),
            "language": "en",
            "size": 10
        }
        try:
            async with session.get(NEWS_API_URL, params=params) as response:
                if response.status == 200:
                    data = await response.json()
                    return data.get("results", [])
                logging.error(f"Error {response.status}: {await response.text()}")
        except Exception as e:
            logging.error(f"Network error: {str(e)}")
        return []

# ✅ Process News Articles
class ArticleProcessor:
    def process_article(self, article: Dict) -> Dict:
      description = article.get("description")  # Get description without default first
      if not description:  # Check for None or empty string
          return None

      return {
          "title": article.get("title", "Untitled Article"),
          "source": article.get("source_id", "Unknown source"),
          "description": description,
          "url": article.get("link", "#"),
          "published_date": article.get("pubDate", ""),
          "analysis": self.analyze_content(description)
      }

    def analyze_content(self, text: str) -> Dict:
        doc = nlp(text)
        return {
            "sentiment": sentiment_analyzer.polarity_scores(text),
            "entities": {ent.text: ent.label_ for ent in doc.ents},
            "keywords": [chunk.text for chunk in doc.noun_chunks]
        }

# ✅ Main Execution
async def main():
    # 🔹 User Input for Topic & Country
    topic = input("\n🔎 Enter a topic to search for news: ").strip()

    print("\n🌍 **Select a Location:**")
    for i, (country, code) in enumerate(COUNTRIES.items(), start=1):
        print(f"{i}. {country}")

    location_index = input("\nEnter location number: ").strip()
    chosen_location = list(COUNTRIES.values())[int(location_index) - 1] if location_index.isdigit() else "in"

    # 🔹 Initialize Fetcher & Processor
    fetcher = NewsFetcher()
    processor = ArticleProcessor()

    async with aiohttp.ClientSession() as session:
        articles = await fetcher.fetch_articles(session, topic, chosen_location)

    processed_articles = [processor.process_article(a) for a in articles if a]
    results = [p for p in processed_articles if p is not None]

    # 🔹 Save Results to JSON
    with open("news_analysis.json", "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    # 🔹 Display Results
    if results:
        print("\n📰 **Latest News Articles:**\n")
        for i, article in enumerate(results, start=1):
            print(f"🔹 **{i}. {article['title']}**")
            print(f"📖 **Description:** {article['description']}")
            print(f"🔗 [Read More]({article['url']})")
            print(f"📅 Published on: {article['published_date']}")
            print(f"📌 **Sentiment Analysis:** {article['analysis']['sentiment']}\n")
            print("-" * 80)
    else:
        print("\n❌ No news articles found.")

if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())  # ✅ Run the async function properly



🔎 Enter a topic to search for news: Union Budget

🌍 **Select a Location:**
1. India
2. USA
3. UK
4. Canada
5. Australia
6. Germany
7. France
8. China
9. Japan

Enter location number: 1

📰 **Latest News Articles:**

🔹 **1. Andhra Pradesh to Play Key Role in Viksit Bharat, Says Petroleum Minister Hardeep Singh Puri**
📖 **Description:** Andhra Pradesh is full of opportunities in the petroleum sector and will play a key role in India’s march towards the goal of Viksit Bharat, said Hardeep Singh Puri, Minister for Petroleum and Natur...
🔗 [Read More](http://www.en.etemaaddaily.com/world/national/andhra-pradesh-to-play-key-role-in-viksit-bharat-says-petroleum-minister-hardeep-singh-puri:170843)
📅 Published on: 2025-02-22 05:21:49
📌 **Sentiment Analysis:** {'neg': 0.0, 'neu': 0.865, 'pos': 0.135, 'compound': 0.6124}

--------------------------------------------------------------------------------
🔹 **2. Budget Proposals On Agriculture, Fiscal Consolidation Commitment Positive For Price Stabi

In [38]:
import json
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

# Load JSON file
with open("news_analysis.json", "r", encoding="utf-8") as f:
    news_data = json.load(f)

# Extract URLs
urls = [entry["url"] for entry in news_data]

# Function to extract text from a webpage
def extract_text_from_url(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract the main article text (modify if needed)
        paragraphs = soup.find_all("p")
        text = " ".join([para.get_text() for para in paragraphs])
        return text[:2000]  # Limit to 2000 characters for summarization
    except Exception as e:
        return f"Error fetching article: {str(e)}"

# Load the first summarization model
summarizer1 = pipeline("summarization", model="facebook/bart-large-cnn")

# First round of summarization
first_summaries = []
for url in urls:
    article_text = extract_text_from_url(url)
    if "Error" in article_text:
        first_summaries.append(article_text)
    else:
        summary = summarizer1(article_text, do_sample=False)
        first_summaries.append(summary[0]["summary_text"])

# Load the second summarization model (same or different)
summarizer2 = pipeline("summarization", model="facebook/bart-large-cnn")

# Second round of summarization
final_summary = summarizer2(" ".join(first_summaries), max_length=300, min_length=100, do_sample=False)

# Print final summary
print("\nFinal Summarized Output:\n")
print(final_summary[0]["summary_text"])


Device set to use cpu
Your max_length is set to 142, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Your max_length is set to 142, but your input_length is only 3. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=1)
Device set to use cpu



Final Summarized Output:

Budget 2025 prioritises non-inflationary growth through careful fiscal management. Government’s entire borrowing to be channelled exclusively into capital expenditure. Apple has announced the release timeline for its upcoming software update, iOS 18.4. This update, set to arrive in April, will bring significant improvements, including the expansion of Apple Intelligence to more languages. CNN.com will feature iReporter photos in a weekly Travel Snapshots gallery. Please submit your best shots of the U.S. for next week.


In [None]:

import requests
import time
from transformers import pipeline

# ✅ News API Key & URL
NEWS_API_KEY = "pub_713126568809cbb479e396fcef2b6c9d38644"
NEWS_API_URL = "https://newsdata.io/api/1/news"

# ✅ Load Summarization Model (BART Large CNN)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# ✅ Categories & Locations
CATEGORIES = ["top", "politics", "business", "technology", "sports", "health",
              "entertainment", "science", "environment", "food", "education"]

COUNTRIES = {
    "India": "in", "USA": "us", "UK": "gb", "Canada": "ca", "Australia": "au",
    "Germany": "de", "France": "fr", "China": "cn", "Japan": "jp"
}

# ✅ Fetch news articles
def fetch_news(category, country):
    params = {
        "apikey": NEWS_API_KEY,
        "category": category.lower(),
        "country": country.lower(),
        "language": "en",
        "size": 5  # Fetching 5 articles for citations
    }
    response = requests.get(NEWS_API_URL, params=params)
    data = response.json()

    if response.status_code == 200 and "results" in data:
        return data["results"][:5]  # Get only the top 5 articles
    else:
        print(f"❌ API Error: {data.get('message', 'Unknown error')}")
        return []

# ✅ Get summary using BART
def get_summary(text):
    try:
        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
        return summary[0]["summary_text"]
    except Exception as e:
        print(f"❌ Error summarizing: {e}")
        return "No summary available."

# ✅ Display menu for category selection
print("\n📌 **Select a News Category:**")
for i, category in enumerate(CATEGORIES, start=1):
    print(f"{i}. {category.capitalize()}")

category_index = input("\nEnter category number: ").strip()
chosen_category = CATEGORIES[int(category_index) - 1] if category_index.isdigit() else "top"

# ✅ Display menu for location selection
print("\n🌍 **Select a Location:**")
for i, (country, code) in enumerate(COUNTRIES.items(), start=1):
    print(f"{i}. {country}")

location_index = input("\nEnter location number: ").strip()
chosen_location = list(COUNTRIES.values())[int(location_index) - 1] if location_index.isdigit() else "in"

# 🔥 Fetch and process news
news_articles = fetch_news(chosen_category, chosen_location)
citations = []
summaries = []

if news_articles:
    for article in news_articles:
        title = article.get("title", "No title available")
        description = article.get("description", "No content available")
        url = article.get("link", "No URL available")

        if description != "No content available":
            summary = get_summary(description)
            summaries.append(summary)
            citations.append(f"{title} - {url}")
            time.sleep(1)  # Avoid overwhelming the summarizer

    # ✅ Combine summaries into a coherent overview
    final_summary = " ".join(summaries)[:500] + "..."

    # ✅ Output
    print("\n📰 **Citations:**")
    for cite in citations:
        print(f"- {cite}")

    print("\n📖 **Summary of all perspectives:**")
    print(final_summary)
else:
    print("❌ No news articles found.")