In [1]:
!pip install scrapy requests beautifulsoup4 newspaper3k

Collecting scrapy
  Downloading Scrapy-2.12.0-py2.py3-none-any.whl.metadata (5.3 kB)
Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl.metadata (11 kB)
Collecting Twisted>=21.7.0 (from scrapy)
  Downloading twisted-24.11.0-py3-none-any.whl.metadata (20 kB)
Collecting cssselect>=0.9.1 (from scrapy)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl.metadata (2.2 kB)
Collecting itemloaders>=1.0.1 (from scrapy)
  Downloading itemloaders-1.3.2-py3-none-any.whl.metadata (3.9 kB)
Collecting parsel>=1.5.0 (from scrapy)
  Downloading parsel-1.10.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting queuelib>=1.4.2 (from scrapy)
  Downloading queuelib-1.7.0-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting service-identity>=18.1.0 (from scrapy)
  Downloading service_identity-24.2.0-py3-none-any.whl.metadata (5.1 kB)
Collecting w3lib>=1.17.0 (from scrapy)
  Downloading w3lib-2.3.1-py3-none-any.whl.metadata (2.3 kB)
Collecting zope.interface>=5.1.0 (from scrapy)
  Downloading z

In [None]:
!pip install lxml_html_clean

Collecting lxml_html_clean
  Downloading lxml_html_clean-0.4.1-py3-none-any.whl.metadata (2.4 kB)
Downloading lxml_html_clean-0.4.1-py3-none-any.whl (14 kB)
Installing collected packages: lxml_html_clean
Successfully installed lxml_html_clean-0.4.1


In [None]:
!pip install anthropic

Collecting anthropic
  Downloading anthropic-0.47.1-py3-none-any.whl.metadata (24 kB)
Downloading anthropic-0.47.1-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.5/239.5 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anthropic
Successfully installed anthropic-0.47.1


In [None]:
from newspaper import Article
import requests
from bs4 import BeautifulSoup
import anthropic
import json
from typing import Dict, List, Optional
from datetime import datetime
from collections import Counter
import os



# Scrapes the article from the web
def scrape_article(url):
    article = Article(url)
    article.download()
    article.parse()
    return {
        'title': article.title,
        'text': article.text,
        'publish_date': article.publish_date,
        'url': url
    }

In [None]:
scrape_article("https://www.nytimes.com/2024/12/26/world/asia/south-korea-impeach-president-han.html")

{'title': 'South Korean Lawmakers Impeach Acting President as Crisis Deepens',
 'text': 'South Korea’s leadership crisis deepened on Friday after lawmakers voted to oust a second head of state, the acting president, in less than two weeks.\n\nThe move prolonged the political vacuum that has gripped South Korea since President Yoon Suk Yeol shocked the country this month by briefly putting it under military rule for the first time in decades.\n\nLawmakers impeached and suspended Mr. Yoon on Dec. 14 over the martial law move, and Prime Minister Han Duck-soo stepped in as acting president. But Mr. Han’s tenure would also prove short-lived, as opposition lawmakers voted on Friday to impeach Mr. Han, as well.\n\nThis was the first time South Korea had impeached an interim leader. It meant that South Korea continued to be without a strong elected leader who could take charge of the government and military in one of Washington’s most important allies, at a time when the country is grappling w

In [None]:
import requests
import json
from datetime import datetime

# Uses NYT API to get the top articles of the day
def get_top_stories(section, api_key):
    url = f"https://api.nytimes.com/svc/topstories/v2/{section}.json"
    params = {'api-key': api_key}

    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()['results']
    return None



In [None]:
def get_all_articles():
    api_key = input("Your news api key here:")
    sections = ['world', 'technology', 'science', 'health', 'business']

    all_articles = {}
    for section in sections:
        articles = get_top_stories(section, api_key)
        if articles:
            all_articles[section] = [{
                'title': article['title'],
                'url': article['url'],
                'published_date': article['published_date'],
                'abstract': article['abstract'],
                'section':section
            } for article in articles]

    return all_articles



In [None]:
all_articles = get_all_articles()

In [None]:

class NewsAnalyzer:
    def __init__(self, api_key: str):
        """
        Initialize the NewsAnalyzer with Claude API credentials.
        """
        self.client = anthropic.Client(api_key=api_key)
        self.model = "claude-3-5-sonnet-20241022"

    def _extract_key_themes(self, articles: List[Dict]) -> List[str]:
        """Extract main themes from a list of articles using Claude."""
        # Prepare prompt for Claude
        articles_text = "\n".join([
            f"Title: {article.get('title', '')}\n"
            f"Abstract: {article.get('abstract', '')}\n"
            for article in articles  # Analyze all
        ])

        prompt = f"""Analyze these news articles and identify 3-4 key one-word themes. First describe each item in bullet points succinctly, then focus on major trends and patterns:

        {articles_text}

        Please provide the themes in a bullet-point format."""

        response = self.client.messages.create(
            model=self.model,
            max_tokens=300,
            messages=[{
                "role": "user",
                "content": prompt
            }]
        )

        # Extract themes from Claude's response
        themes = response.content[0].text.split("\n")
        return [theme.strip("• ").strip() for theme in themes if theme.strip()]

    def analyze_news_data(self, news_data: Dict[str, List[Dict]]) -> Dict:
        """
        Analyze news data and generate insights using Claude API.

        Args:
            news_data (Dict[str, List[Dict]]): Dictionary with sections as keys and lists of articles as values
                Each article should have 'title', 'abstract', 'published_date', and 'url' fields

        Returns:
            Dict: Analysis results including:
                - Article counts per section
                - Key themes per section
                - Recent headlines
                - Timeline analysis
        """
        analysis = {
            "timestamp": datetime.now().isoformat(),
            "sections": {}
        }

        for section, articles in news_data.items():
            if not articles:
                continue

            # Basic statistics
            section_analysis = {
                "article_count": len(articles),
                "recent_headlines": [
                    article["title"] for article in articles  # most recent 10
                ],
                "key_themes": self._extract_key_themes(articles),
                "date_range": {
                    "start": min(article["published_date"] for article in articles if article.get("published_date")),
                    "end": max(article["published_date"] for article in articles if article.get("published_date"))
                }
            }

            # Add to main analysis
            analysis["sections"][section] = section_analysis

        return analysis

    def generate_summary_report(self, analysis: Dict) -> str:
        """
        Generate a human-readable summary report from the analysis as if you are an expert on world news from Bloomberg. You are keen to spot any new confliencts, risks and opportunities
        for current countries and companies. You are also keen to spot new supply and demand changes based on the headlines and themes. You also give critical advice to entrepreneurs. Cite
        specific cases to support your advice, do not provide any generic ones.

        Args:
            analysis (Dict): Analysis output from analyze_news_data()

        Returns:
            str: Formatted summary report
        """
        prompt = f"""Based on this news analysis data, create a concise summary report highlighting the most important insights:

        {json.dumps(analysis, indent=2)}

        Format the report with sections for:
        1. Overall Coverage Summary
        2. Key Themes by Section
        3. Notable Recent Headlines
        4. Your expert reflection of the current world conditions based on today's news.
        5. Hidden trendes and opportunties for companies.
        """

        response = self.client.messages.create(
            model=self.model,
            max_tokens=1000,
            messages=[{
                "role": "user",
                "content": prompt
            }]
        )

        return response.content[0].text



In [None]:
# Example usage
def run():
    # Initialize analyzer
    analyzer = NewsAnalyzer(api_key=input("your claude key here:"))

    # Sample news data
    news_data = all_articles

    # Run analysis
    analysis = analyzer.analyze_news_data(news_data)

    # Generate report
    report = analyzer.generate_summary_report(analysis)
    print(report)

In [None]:
run()

NEWS ANALYSIS REPORT
Date: February 24, 2025

1. OVERALL COVERAGE SUMMARY
Total articles analyzed: 151 across 5 major sections
- World: 36 articles (24%)
- Business: 34 articles (22%)
- Technology: 29 articles (19%)
- Science: 26 articles (17%)
- Health: 26 articles (17%)

2. KEY THEMES BY SECTION
World:
- Global power realignment (US-Russia-China dynamics)
- European political transitions
- Ongoing conflicts (Ukraine, Gaza)

Technology:
- AI industry power struggles
- Regulatory pressures on tech giants
- Cryptocurrency market volatility
- Corporate restructuring

Science/Health:
- Government agency disruptions
- Public health challenges
- Research funding concerns
- Scientific advancement despite obstacles

Business:
- Media industry transformation
- Energy sector transitions
- Corporate leadership changes
- Market uncertainty

3. NOTABLE RECENT HEADLINES
Most Significant:
- "Three Years Into War in Ukraine, Trump Ushers in New World for Putin"
- "OpenAI Uncovers Evidence of A.I.-Pow