In [1]:
print("AI news digest")

AI news digest


In [None]:
# 1
import requests 
import os 
from datetime import datetime, timedelta, timezone 
from dotenv import load_dotenv 

# load environment variables from .env file 
load_dotenv()

class NewsFetcher:
    def __init__(self, api_key=None):
        self.api_key = api_key or os.getenv("NEWSAPI_KEY")
        if not self.api_key:
            raise ValueError("NEWSAPI_KEY not found in environment variables or .env file")
        self.base_url = "https://newsapi.org/v2/everything"

    def fetch_articles(self, query: str, num_articles: int = 5, days_back: int = 1, sources: str="", language: str = "en") -> list:
        """ 
        Fetch recent news articles based on user query 
        Returns list of artciles with: title, url, content, source, and publishedAt
        """
        # calculate date range 
        to_date = datetime.now(timezone.utc)
        from_date = to_date - timedelta(days=days_back)

        params = {
            "q": query, 
            "pageSize": num_articles, 
            "from": from_date.strftime("%Y-%m-%d"),
            "to": to_date.strftime("%Y-%m-%d"),
            "language": language,
            "sortBy": "relevancy",
            "apiKey": self.api_key # Make sure this is included 
        }

        if sources:
            params["sources"] = sources 

        try:
            response = requests.get(self.base_url, params=params)
            # Check for 401 specifically 
            if response.status_code == 401:
                print("401 Unauthorized: Check your API key")
                print(f"Key key: {self.api_key[:3]}...{self.api_key[-3:]}")
                print("Verify your key at https://newsapi_org/account")
                return []
            
            response.raise_for_status()
            data = response.json()

            if data['status'] == "ok":
                return [ 
                    {
                        "title": article['title'],
                        "url": article["url"],
                        "content": article["content"] or article["description"] or "",
                        "source": article["source"]["name"],
                        "published": article["publishedAt"]
                    }
                    for article in data["articles"]
                ]

            else:
                print(f"API Error: {data.get('message', 'Unknown error')}")
                return []


        except requests.exceptions.RequestException as e:
            print(f"Request failed: {str(e)}")
            return [] 


In [2]:
try:

    fetcher = NewsFetcher()
    articles = fetcher.fetch_articles(
        query="AI startups",
        num_articles=5, 
        days_back=1
    )

    print(f"Found {len(articles)} articles:")
    for i, article in enumerate(articles, 1):
        print(f"\nArticle {i}: {article['title']}")
        print(f"Source: {article['source']}")
        print(f"URL: {article['url']}")
        if article['content']:
            print(f"Preview: {article['content'][:100]}...")

except ValueError as e:
    print(f"Configuration error: {str(e)}")
    print("Please create a .env file with NEWSAPI_KEY=your_api_key")
    

    

Found 5 articles:

Article 1: AI is raising the bar for sales — and Microsoft's layoffs prove the 'relationship guy' is out, says a software investor
Source: Business Insider
URL: https://www.businessinsider.com/microsoft-layoffs-salespeople-relationship-guy-ai-solution-engineer-investor-2025-7
Preview: Microsoft began culling less than 4% of its workforce, or about 9,000 employees, earlier this month,...

Article 2: Shopify has quietly set boundaries for ‘buy-for-me’ AI bots on merchant sites
Source: Digiday
URL: http://digiday.com/marketing/shopify-has-quietly-set-boundaries-for-buy-for-me-ai-bots-on-merchant-sites/
Preview: Shopify is drawing a line in the sand on agentic AI a type of bot that autonomously completes tasks ...

Article 3: Amazon challenges Microsoft with Kiro, its new AI-powered IDE
Source: Neowin
URL: https://www.neowin.net/news/amazon-challenges-microsoft-with-kiro-its-new-ai-powered-ide/
Preview: The market for AI tools aimed at developers has become one of the mo

In [None]:
import os 
from langchain_groq import ChatGroq 
from langchain.prompts import PromptTemplate 
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough 
from dotenv import load_dotenv 

# Load environment variables 
load_dotenv()

class ArticleSummarizer:
    def __init__(self, model_name="llama3-70b-8192"):
        """  
        Initialize Groq summarizer 
        Available model: "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"
        """
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        if not self.groq_api_key:
            raise ValueError("GROQ_aPI_KEY not found in .env file")
        
        self.model = ChatGroq(
            temperature=0.3,
            model_name=model_name,
            api_key=self.groq_api_key
        )

        # Configure text splitter for long articles 
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=12000, # Adjust based on model context window  
            chunk_overlap=500,
            length_function=len 
        )

        # Define our summarization prompt 
        self.summary_prompt = PromptTemplate.from_template(
            """  You are an expert new analyst. Create a concise summary (1- paragraphs) of the following article:
            
            **Article Content:** 
            {content}
            
            **Summary Requirements:** 
            1. Identify the core insight or main claim 
            2. Extract all important named entities (people, organizations, locations)
            3. Highlight key facts and figures 
            4. Maintain neutral, objective tone 

            **Output Format:** 
            - First paragraph: Core insight and key facts 
            - Second paragraph: Named entities and their significance 

            **Summary:**
            """
        )

        # create summarization chain 
        self.summary_chain = (
            {"content": RunnablePassthrough()}
            | self.summary_prompt 
            | self.model
            | StrOutputParser() 
        )
    
    def summarize(self, article_content: str) -> str:
        """  
        Summarize article content handling long text 
        """
        # skip summarization if content is too short
        if len(article_content) < 300:
            return article_content 
        
        # Handle long article with chunking 
        if len(article_content) > 8000:
            chunks = self.text_splitter.split_text(article_content)
            summaries = []

            # Summarize each chunk 
            for i, chunk in enumerate(chunks):
                chunk_summary = self.summary_chain.invoke(chunk)
                summaries.append(f"Part {i+1}: {chunk_summary}")

            # Combine chunk summaries 
            combined_content = "\n\n".join(summaries)
            return self.summary_chain.invoke(combined_content)
        
        # Direct summarization for normal-length articles 
        return self.summary_chain.invoke(article_content)
    

In [4]:
# Initialize components 
fetcher = NewsFetcher()
summarizer = ArticleSummarizer(model_name="mixtral-8x7b-32768")  # Faster model 

# Get articles on a topic 
articles = fetcher.fetch_articles(
    query="AI in healthcare",
    num_articles=3, 
    days_back=2
)

# Summarize each article 
for i, article in enumerate(articles, 1):
    print(f"\n{'='*50}")
    print(f"Article {i}: {article['title']}")
    print(f"Souce: {article['source']}")
    print(f"URL: {article['url']}")

    # Summarize content 
    summary =summarizer.summarize(article['content'])
    print(f"\nSummary")
    print(summary)
    print(f"{'='*50}\n")
    


Article 1: ‘Saturday Night Live’ VFX Workers Ratify First Union Contract Via IATSE
Souce: Deadline
URL: http://deadline.com/2025/07/saturday-night-live-vfx-workers-ratify-first-contract-iatse-1236456965/

Summary
Visual effects workers at Saturday Night Liveare now officially operating under a union contract.
The 15-member group voted unanimously in July to ratify its first union contract since organizing wi… [+1185 chars]


Article 2: Swiss AI For Public Good: A ProSocial AI Blueprint For The World?
Souce: Forbes
URL: https://www.forbes.com/sites/corneliawalther/2025/07/14/swiss-ai-for-public-good-a-prosocial-ai-blueprint-for-the-world/

Summary
Cow in Swiss Montains
getty
Current Artificial intelligence development resembles a high-stakes race between tech giants. But Switzerland has chosen a different path. The Swiss AI Initiative's fort… [+10100 chars]


Article 3: HealthMutual, MediConCen partner to develop AI
Souce: Lifeinsuranceinternational.com
URL: https://www.lifeinsurancein

In [6]:
# 2
import os 
import requests 
from langchain_groq import ChatGroq 
from langchain.prompts import PromptTemplate 
from langchain.schema import StrOutputParser 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
from bs4 import BeautifulSoup
#from newspaper import Article, ArticleException
import re 
import nltk 
import logging 
#from nltk.corpus import stopwords 
#from nltk.tokenize import sent_tokenize 

# Configure logging 
logging.basicConfig(level=logging.INFO, format='%(asctime)s -%(levelname)s - %(message)s')

# download required NLTK data 
#nltk.download('punkt')
#nltk.download('stopwords')

# load environment variables 
load_dotenv()

class FullTextExtractor:
    """Robust text extraction without newspaper library"""
    @staticmethod
    def extract_text(url: str) -> str:
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Connection': 'keep-alive',
                'Referer': 'https://www.google.com/'
            }

            # Try to bypass paywalls for specific sites
            if "businessinsider.com" in url:
                headers['Referer'] = 'https://www.facebook.com/'
                headers['Cookie'] = 'bounceClientVisit=1; bounceClientFirstVisit=1'
            
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'lxml')

            # Remove unwanted elements
            for element in soup(['script', 'style', 'nav', 'footer', 'aside', 'form', 'header', 
                                 'iframe', 'button', 'svg', 'figure', 'noscript', 'img', 'link']):
                element.decompose()

            # Find main content using common selectors
            selectors = [
                'article', 
                'div.article-body',
                'div.post-content',
                'div.story-content',
                'div.entry-content',
                'div.content-wrapper',
                'div.main-content',
                'section.main',
                'div.article-content',
                'div#article-body',
                'div.article-text',
                'div.post-body'
            ]
            
            article_body = None
            for selector in selectors:
                article_body = soup.select_one(selector)
                if article_body:
                    break

            # Fallback to body if no specific content found
            if not article_body:
                article_body = soup.body

            # Extract text with paragraph structure
            text = ""
            for element in article_body.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'blockquote']):
                if element.name == 'p':
                    text += element.get_text().strip() + "\n\n"
                elif element.name == 'blockquote':
                    text += f"> {element.get_text().strip()}\n\n"
                else:  # Headings
                    text += f"\n\n{element.get_text().strip().upper()}\n\n"

            # Clean and compress text
            text = re.sub(r'\n{3,}', '\n\n', text)  # Remove excessive newlines
            text = re.sub(r'\[\+[0-9,]+\s*chars?\]', '', text)  # Remove truncation markers
            return text.strip()
                
        except Exception as e:
            logging.error(f"Extraction failed for {url}: {str(e)}")
            return ""

class ArticleSummarizer:
    def __init__(self, model_name="llama3-70b-8192"):
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        if not self.groq_api_key:
            raise ValueError("GROQ_API_KEY not found in .env file")
        
        self.model = ChatGroq(
            temperature=0.3,
            model_name=model_name,
            api_key=self.groq_api_key
        )

        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=8000,
            chunk_overlap=300,
            length_function=len 
        )

        self.summary_prompt = PromptTemplate.from_template(
            """
            Create a professional 2-paragraph news summary from the following article content.
            Follow these guidelines:
            1. Omit any introductory phrases like "Here is a summary"
            2. First paragraph: Core innovation/event and key facts 
            3. Second paragraph: Key entities and business implications 
            4. Include specific numbers and metrics when available 
            5. Maintain jouranlistic tone

            Example structure:
            [Company] has [achievement] using [technology]. The development [specific impact]... 
            Key players include [names] from [organizations]. This could [business implication]...

            Article Content:
            {content}

            Professional Summary: 

            """ 
        )

        self.summary_chain = (
            {"content": RunnablePassthrough()}
            | self.summary_prompt
            | self.model
            |StrOutputParser()
        )

    def summarize(self, article: dict) -> str:
        """Robust summarization with multiple fallbacks"""
        # Get full article content 
        full_text = FullTextExtractor.extract_text(article['url'])

        # Use snippet if full text extraction failed 
        if not full_text.strip() or len(full_text) < 300:
            logging.warning(f"Using snippet for {article['url']}")
            full_text = self.clean_snippet(article['content'])
            if len(full_text) < 100:
                return "Summary unavailable: Could not retrieve content"
            
        # Clean and prepare text 
        clean_text = self.preprocess_text(full_text)
        logging.info(f"Processing text: {len(clean_text)} characters")
        
        # Handle long articles with chunking 
        if len(clean_text) > 8000:
            chunks = self.text_splitter.split_text(clean_text)
            chunk_summaries = []

            for i, chunk in enumerate(chunks, 1):
                logging.info(f"Summarizing chunk {i}/{len(chunks)}")
                chunk_summaries.append(self.summarize_chunk(chunk))
                
            combined_content = "\n\n".join(chunk_summaries)
            return self.summarize_chunk(combined_content)
        
        return self.summarize_chunk(clean_text)
     
    def summarize_chunk(self, text: str) -> str:
        """Handle single chunk summarization with error recovery"""
        try:
            return self.summary_chain.invoke(text)
        except Exception as e:
            logging.error(f"Summarization error: {str(e)}")
            return "Summary generation failed" 
        
    def preprocess_text(self, text: str) -> str:
        """Clean text before processing"""
        # Remove common boilerplate
        patterns = [
            r"Sign up for.*newsletters",
            r"Subscribe to.*channel",
            r"Follow us on.*",
            r"Download our.*app",
            r"Read more:.*",
            r"Continue reading.*",
            r"Advertisement",
            r"Recommended for you",
            r"Related:.*",
            r"Please enter your email",
            r"Already have an account\? Log in",
            r"Create a free account",
            r"© Copyright.*"
        ]
        
        
        for pattern in patterns:
            text = re.sub(pattern, "", text, flags=re.IGNORECASE)
        
        return text.strip()
        
    def clean_snippet(self, snippet: str) -> str:
        """Clean NewsAPI snippets"""
        # Remove truncation markers
        snippet = re.sub(r'\[\+[0-9,]+\s*chars?\]', '', snippet)
        # Remove HTML tags
        snippet = re.sub(r'<[^>]+>', '', snippet)
        return snippet


In [7]:
# Initialize components 
fetcher = NewsFetcher()

summarizer = ArticleSummarizer(model_name="llama3-70b-8192")

# Get articles 
articles = fetcher.fetch_articles(
    query="Stock Market",
    num_articles=3,
    days_back=2
)

# Process each article 
for i, article in enumerate(articles, 1):
    print(f"\n{'='*50}")
    print(f"Article {i}: {article['title']}")
    print(f"Source: {article['source']}")
    print(f"URL: {article['url']}")

    try:
        # get enhanced summary 
        summary = summarizer.summarize(article)
        print(f"\n📝 Summary:")
        print(summary)
    except Exception as e:
        print(f"❌ Summarization failed: {str(e)}")

    print(f"{'='*50}\n")



Article 1: Tesla stock needs 'Elon Musk magic,' expert says
Source: Yahoo Entertainment
URL: https://finance.yahoo.com/video/tesla-stock-needs-elon-musk-100024661.html


2025-07-16 11:52:52,835 -INFO - Processing text: 3501 characters
2025-07-16 11:52:54,285 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"



📝 Summary:
Tesla's stock valuation is heavily reliant on "Elon Musk magic," according to Tufts University Fletcher School of Law and Diplomacy adjunct associate professor Gautam Mukunda. Musk's recent announcement to launch a new third party has sparked concerns among investors and analysts, who fear that the CEO's distraction from the company's core business could negatively impact the stock. Mukunda argues that Tesla's current valuation is attributed to Musk's ability to drive innovation and growth, and that his involvement is crucial to the company's success.

Key players, including Musk and investors, are closely watching the situation unfold. If Musk is not fully focused on Tesla, the company's valuation could take a hit. Mukunda suggests that investors who are ignoring the drama and focusing solely on Tesla's electric vehicle sales may need to reevaluate their strategy, as the company's dominance in the electric car market is being challenged by rivals producing high-quality car

2025-07-16 11:52:55,311 -INFO - Processing text: 6384 characters
2025-07-16 11:52:56,489 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"



📝 Summary:
Here is a 2-paragraph professional news summary:

Roku, Robert Half, and Ligand Pharmaceuticals are among the top 10 undervalued stocks in the US market, with estimated discounts of up to 49.4% based on cash flows. The US market has remained flat over the past week, but has shown an impressive 11% increase over the past year with earnings forecasted to grow by 15% annually. Identifying undervalued stocks can offer potential value opportunities for investors looking to capitalize on future growth prospects.

Key players include Mr. Cooper Group, Apollo Global Management, and Lazard, which are trading at discounts of up to 25.8% to their estimated fair values. These companies have strong growth prospects, with forecasted annual earnings growth rates ranging from 18.93% to 24.8%. Despite some challenges, these companies have the potential to outperform the broader US market, making them attractive opportunities for investors seeking value and growth.


Article 3: Rigetti Compu

2025-07-16 11:52:57,608 -INFO - Processing text: 2923 characters
2025-07-16 11:52:58,521 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"



📝 Summary:
Rigetti Computing (NASDAQCM:RGTI) has completed a $350 million equity offering, coinciding with a 41% share price increase over the last quarter. This significant valuation change is attributed to the company's strategic realignment under growth benchmarks, collaborative efforts in quantum technology, and successful capital raising initiatives.

Key players include Rigetti Computing, which has achieved a total return of over 900% in the past year, outperforming the broader US market and the US Semiconductor industry. The company's transition to growth-oriented benchmarks may positively impact its revenue and earnings forecasts, with analysts expecting potential revenue growth despite remaining unprofitable in the near future. With the current share price below the consensus analyst target, a potential uplift of around 21.8% is forecasted.



In [11]:
import os
import requests
from langchain_groq import ChatGroq
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Load environment variables
load_dotenv()

class FullTextExtractor:
    """Improved full-text extraction with better content detection"""
    @staticmethod
    def extract_text(url: str) -> str:
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Connection': 'keep-alive'
            }
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Remove unwanted elements
            for element in soup(['script', 'style', 'nav', 'footer', 'aside', 'form', 'header', 
                                 'iframe', 'button', 'svg', 'figure', 'noscript']):
                element.decompose()
                
            # Find main content using common selectors
            selectors = [
                'article', 
                'div.article-body',
                'div.post-content',
                'div.story-content',
                'div.entry-content',
                'div.content-wrapper',
                'div.main-content',
                'section.main'
            ]
            
            article_body = None
            for selector in selectors:
                article_body = soup.select_one(selector)
                if article_body:
                    break
                    
            # Fallback to body if no specific content found
            if not article_body:
                article_body = soup.body
                
            # Extract text with paragraph structure
            text = ""
            for element in article_body.find_all(['p', 'h1', 'h2', 'h3']):
                if element.name == 'p':
                    text += element.get_text().strip() + "\n\n"
                else:  # Headings
                    text += f"\n\n{element.get_text().strip().upper()}\n\n"
            
            # Clean and compress text
            text = re.sub(r'\n{3,}', '\n\n', text)  # Remove excessive newlines
            return text.strip()
                
        except Exception as e:
            print(f"Error extracting text from {url}: {str(e)}")
            return ""

class ArticleSummarizer:
    def __init__(self, model_name="llama3-70b-8192"):
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        if not self.groq_api_key:
            raise ValueError("GROQ_API_KEY not found in .env file")
            
        self.model = ChatGroq(
            temperature=0.3,
            model_name=model_name,
            api_key=self.groq_api_key
        )
        
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=8000,
            chunk_overlap=300,
            length_function=len
        )
        
        self.summary_prompt = PromptTemplate.from_template(
            """You are an expert news analyst. Create a concise 2-paragraph summary of the article below.
            Focus on these key elements:
            1. The core innovation, event, or main claim
            2. Key people/organizations involved
            3. Significant data points or metrics
            4. Business or industry implications
            
            Structure your summary as:
            - First paragraph: Core insight and key facts
            - Second paragraph: Named entities and significance
            
            If the article content is not available, return only: "Summary unavailable"
            
            Article Content:
            {content}
            
            Summary:"""
        )
        
        self.summary_chain = (
            {"content": RunnablePassthrough()} 
            | self.summary_prompt
            | self.model
            | StrOutputParser()
        )
    
    def summarize(self, article: dict) -> str:
        """Summarize article by extracting full text first"""
        # Get full article content
        full_text = FullTextExtractor.extract_text(article['url'])
        
        # Use snippet if full text extraction failed
        if not full_text.strip():
            print(f"⚠️ Extraction failed for: {article['url']}")
            print("Attempting to use NewsAPI snippet...")
            full_text = article['content']
        
        # If we still don't have text, return error
        if not full_text.strip():
            return "Summary unavailable: Could not retrieve article content"
            
        print(f"\nExtracted content length: {len(full_text)} characters")
        
        # Pre-process text to remove boilerplate
        full_text = self.remove_boilerplate(full_text)
        
        # Summarize with Groq LLM
        if len(full_text) > 7000:
            print("Article too long, chunking...")
            chunks = self.text_splitter.split_text(full_text)
            chunk_summaries = []
            
            for i, chunk in enumerate(chunks, 1):
                print(f"Summarizing chunk {i}/{len(chunks)}...")
                chunk_summaries.append(self.summary_chain.invoke(chunk))
                
            combined_content = "\n\n".join(chunk_summaries)
            print("Creating final summary from chunks...")
            return self.summary_chain.invoke(combined_content)
        
        return self.summary_chain.invoke(full_text)
    
    def remove_boilerplate(self, text: str) -> str:
        """Remove common boilerplate text"""
        patterns = [
            r"Sign up for.*newsletters",
            r"Subscribe to.*channel",
            r"Follow us on.*",
            r"Download our.*app",
            r"Read more:.*",
            r"Continue reading.*",
            r"\[.*chars\]",
            r"Advertisement",
            r"Recommended for you",
            r"Related:.*"
        ]
        
        for pattern in patterns:
            text = re.sub(pattern, "", text, flags=re.IGNORECASE)
        
        return text

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [12]:
# Initialize components
fetcher = NewsFetcher()
summarizer = ArticleSummarizer(model_name="llama3-70b-8192")
    
# Get articles
articles = fetcher.fetch_articles(
    query="AI Startups",
    num_articles=3,
    days_back=2
)
    
# Process each article
for i, article in enumerate(articles, 1):
    print(f"\n{'='*50}")
    print(f"Article {i}: {article['title']}")
    print(f"Source: {article['source']}")
    print(f"URL: {article['url']}")
        
    try:
        # Get enhanced summary
        summary = summarizer.summarize(article)
        print(f"\n📝 Summary:")
        print(summary)
    except Exception as e:
        print(f"❌ Summarization failed: {str(e)}")
        
    print(f"{'='*50}\n")


Article 1: Perplexity's engineers use 2 AI coding tools, and they've cut development time from days to hours
Source: Business Insider
URL: https://www.businessinsider.com/perplexity-engineers-ai-tools-cut-development-time-days-hours-2025-7

Extracted content length: 62 characters

📝 Summary:
Summary unavailable


Article 2: How Google Killed OpenAI’s $3 Billion Deal Without an Acquisition
Source: Gizmodo.com
URL: https://gizmodo.com/how-google-killed-openais-3-billion-deal-without-an-acquisition-2000628693

Extracted content length: 6112 characters

📝 Summary:
Here is a 2-paragraph summary of the article:

Google has dealt a significant blow to OpenAI by poaching key talent from Windsurf, a startup that had a reported $3 billion acquisition deal with OpenAI. Instead, Google is paying $2.4 billion to hire top Windsurf employees, including the CEO, and take a non-exclusive license to its technology. This move solidifies a rising trend in Silicon Valley's AI arms race, known as the "non-

In [None]:
# 3 
class SentimentAnalyzer:
    def __init__(self, model_name="llama3-70b-8192"):
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        if not self.groq_api_key:
            raise ValueError("GROQ_API_KEY not found in .env file")
        
        self.model = ChatGroq(
            temperature=0.1,  # Lower temperature for classification
            model_name=model_name,
            api_key=self.groq_api_key
        )
        self.sentiment_prompt = PromptTemplate.from_template(
            """Classify the sentiment of the following news summary as POSITIVE, NEGATIVE, or NEUTRAL.
            Consider these guidelines:
            1. POSITIVE: Describes growth, success, breakthroughs, or favorable outcomes
            2. NEGATIVE: Describes failures, controversies, losses, or unfavorable outcomes
            3. NEUTRAL: Balanced reporting, announcements without clear positive/negative slant
            
            Respond ONLY with one word: POSITIVE, NEGATIVE, or NEUTRAL
            
            News Summary:
            {summary}
            Sentiment:"""
        )
        self.sentiment_chain = (
            {"summary": RunnablePassthrough()} 
            | self.sentiment_prompt
            | self.model
            | StrOutputParser()
        )

    def analyze(self, summary: str) -> str:
        """Analyze sentiment of a news summary"""
        if "unavailable" in summary.lower() or len(summary) < 20:
            return "NEUTRAL"
        
        try:
            sentiment = self.sentiment_chain.invoke(summary)
            # Clean and standardize the output
            sentiment = sentiment.strip().upper()
            if "POSITIVE" in sentiment:
                return "POSITIVE"
            elif "NEGATIVE" in sentiment:
                return "NEGATIVE"
            return "NEUTRAL"
        except Exception as e:
            print(f"Sentiment analysis failed: {str(e)}")
            return "NEUTRAL"



In [12]:
# 4 
class DailyDigestGenerator:
    def __init__(self, topic: str):
        self.topic = topic

    def generate(self, articles: list[dict]) -> str:
        date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")

        # Count sentiment distribution
        sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
        for article in articles:
            sentiment = article.get('sentiment', 'NEUTRAL')
            sentiment_counts.setdefault(sentiment, 0)
            sentiment_counts[sentiment] += 1

        # Create sentiment summary
        sentiment_summary = (
            f"🔥 {sentiment_counts['POSITIVE']} Positive | "
            f"⚠️ {sentiment_counts['NEUTRAL']} Neutral | "
            f"⚡ {sentiment_counts['NEGATIVE']} Negative"
        )

        # Create key takeaways
        takeaways = []
        for article in articles:
            emoji = (
                "🔥" if article['sentiment'] == "POSITIVE" else
                "⚡" if article['sentiment'] == "NEGATIVE" else
                "⚠️"
            )
            takeaways.append(
                f"{emoji} {article['title']} ({article['sentiment']})\n"
                f"   - {article['summary']}\n"
                f"   - Source: {article['source']}"
            )
        # Generate digest
        lines = [
            f"DAILY NEWS DIGEST: {self.topic.upper()}",
            f"Date: {date_str}",
            f"Articles: {len(articles)}", 
            sentiment_summary,
            "",
            "KEY TAKEAWAYS:"
        ]
        for tk in takeaways:
            lines.append(f"• {tk}")
        lines.append("")
        lines.append("SOURCES:")
        for i, article in enumerate(articles, 1):
            lines.append(f"[{i}] {article['url']}")

        return "\n".join(lines)


        

In [10]:
from datetime import datetime, timezone

class DailyDigestGenerator:
    def __init__(self, topic: str):
        self.topic = topic
        self.date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
        
    def generate(self, articles: list) -> str:
        """Generate daily digest from processed articles"""
        # Count sentiment distribution
        sentiment_counts = {"POSITIVE": 0, "NEGATIVE": 0, "NEUTRAL": 0}
        for article in articles:
            sentiment_counts[article['sentiment']] += 1
        
        # Create sentiment summary
        sentiment_summary = (
            f"🔥 {sentiment_counts['POSITIVE']} Positive | "
            f"⚠️ {sentiment_counts['NEUTRAL']} Neutral | "
            f"⚡ {sentiment_counts['NEGATIVE']} Negative"
        )
        
        # Create key takeaways
        takeaways = []
        for i, article in enumerate(articles, 1):
            emoji = "🔥" if article['sentiment'] == "POSITIVE" else "⚡" if article['sentiment'] == "NEGATIVE" else "⚠️"
            # Truncate summary if too long
            summary = article['summary']
            if len(summary) > 300:
                summary = summary[:297] + "..."
                
            takeaways.append(
                f"{emoji} {article['title']} ({article['sentiment']})\n"
                f"   - {summary}\n"
                f"   - Source: {article['source']}"
            )
        
        # Generate digest
        digest = f"""
DAILY NEWS DIGEST: {self.topic.upper()}
Date: {self.date}
Articles: {len(articles)}
Sentiment: {sentiment_summary}

KEY TAKEAWAYS:
{"".join([f'\n• {tk}' for tk in takeaways])}

SOURCES:
{"".join([f'\n[{i}] {article["url"]}' for i, article in enumerate(articles, 1)])}
"""
        
        return digest

In [None]:
# 5
def main():
    load_dotenv()
    topic = "AI Startups"

    fetcher = NewsFetcher()
    summarizer = ArticleSummarizer()
    sentiment_analyzer = SentimentAnalyzer(model_name="llama3-8b-8192")
    digest_generator = DailyDigestGenerator(topic)

    # Get and process articles
    raw_articles = fetcher.fetch_articles(query=topic, num_articles=5, days_back=1)
    processed_articles = []

    for article in raw_articles:
        summary = summarizer.summarize(article)
        sentiment = sentiment_analyzer.analyze(summary)
        processed_articles.append({
            "title": article['title'],
            "source": article['source'],
            "url": article['url'],
            "summary": summary,
            "sentiment": sentiment
        })

    # Generate and save digest
    digest = digest_generator.generate(processed_articles)
    print(digest)
    filename = f"news_digest_{datetime.now(timezone.utc).strftime('%Y%m%d')}.txt"
    # Open file with utf-8 encoding to support emojis
    with open(filename, "w", encoding="utf-8") as f:
        f.write(digest)
    print(f"\nDigest saved to {filename}")

main()


2025-07-16 12:19:45,049 -INFO - Processing text: 192 characters
2025-07-16 12:19:45,927 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 12:19:46,303 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 12:19:46,708 -INFO - Processing text: 4766 characters
2025-07-16 12:19:47,768 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 12:19:48,114 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 12:19:49,900 -INFO - Processing text: 2088 characters
2025-07-16 12:19:50,960 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 12:19:51,349 -INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-07-16 12:19:51,550 -INFO - Processing text: 4317 characters
2025-07-16 12:19:52,651 -INFO - HTTP Re

DAILY NEWS DIGEST: AI STARTUPS
Date: 2025-07-16
Articles: 5
🔥 3 Positive | ⚠️ 2 Neutral | ⚡ 0 Negative

KEY TAKEAWAYS:
• ⚠️ AI is raising the bar for sales — and Microsoft's layoffs prove the 'relationship guy' is out, says a software investor (NEUTRAL)
   - Here is a professional 2-paragraph news summary:

Microsoft has initiated a workforce reduction, laying off approximately 9,000 employees, which accounts for less than 4% of its total workforce. The majority of those affected are generalist sales representatives. This move is part of the company's efforts to restructure and adapt to changing market conditions.

Key players involved in this decision include Microsoft's top executives, who are driving the company's strategic shift. This significant downsizing is expected to have a ripple effect on the tech industry, potentially influencing the job market and talent acquisition strategies of other major players in the sector.
   - Source: Business Insider
• ⚠️ Shopify has quietly set 

In [29]:
def main():
    # Initialize components
    load_dotenv()
    topic = "AI Startups"
    
    fetcher = NewsFetcher()
    summarizer = ArticleSummarizer()
    sentiment_analyzer = SentimentAnalyzer(model_name="llama3-8b-8192")  # Smaller model for sentiment
    digest_generator = DailyDigestGenerator(topic)
    
    # Get articles
    articles = fetcher.fetch_articles(
        query=topic,
        num_articles=5,
        days_back=1
    )
    
    processed_articles = []
    
    # Process each article
    for i, article in enumerate(articles, 1):
        print(f"\n{'='*50}")
        print(f"Processing Article {i}: {article['title']}")
        print(f"Source: {article['source']}")
        print(f"URL: {article['url']}")
        
        try:
            # Summarize content
            summary = summarizer.summarize(article)
            
            # Analyze sentiment
            sentiment = sentiment_analyzer.analyze(summary)
            
            # Store processed article
            processed_article = {
                "title": article['title'],
                "source": article['source'],
                "url": article['url'],
                "summary": summary,
                "sentiment": sentiment
            }
            processed_articles.append(processed_article)
            
            print(f"\n📝 Summary ({sentiment}):")
            print(summary)
            
        except Exception as e:
            print(f"❌ Processing failed: {str(e)}")
        
        print(f"{'='*50}\n")
    
    # Generate daily digest
    if processed_articles:
        digest = digest_generator.generate(processed_articles)
        print("\n" + "="*60)
        print("DAILY DIGEST REPORT")
        print("="*60)
        print(digest)
        
        # Save to file
        filename = f"news_digest_{datetime.now(timezone.utc).strftime('%Y%m%d')}.txt"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(digest)
        print(f"\nDigest saved to {filename}")
    else:
        print("No articles processed. Digest not generated.")

main()


Processing Article 1: Perplexity's engineers use 2 AI coding tools, and they've cut development time from days to hours
Source: Business Insider
URL: https://www.businessinsider.com/perplexity-engineers-ai-tools-cut-development-time-days-hours-2025-7

Extracted content length: 62 characters

📝 Summary (NEUTRAL):
Summary unavailable


Processing Article 2: I was laid off from my product management job at Microsoft at 25. It may have been the best thing for me.
Source: Business Insider
URL: https://www.businessinsider.com/laid-off-microsoft-big-tech-best-thing-2025-7

Extracted content length: 62 characters

📝 Summary (NEUTRAL):
Summary unavailable


Processing Article 3: AI Or The Human Touch? Striking A Balance In Customer Retention
Source: Forbes
URL: https://www.forbes.com/sites/alisoncoleman/2025/07/14/ai-or-the-human-touch-striking-a-balance-in-customer-retention/

Extracted content length: 7104 characters
Article too long, chunking...
Summarizing chunk 1/1...
Creating final summa

In [15]:
def main():
    # Initialize components
    load_dotenv()
    topic = "AI Startups"

    fetcher = NewsFetcher()
    summarizer = ArticleSummarizer(model_name="mixtral-8x7b-32768")
    sentiment_analyzer = SentimentAnalyzer(model_name="llama3-8b-8192")  # Smaller model for sentiment
    digest_generator = DailyDigestGenerator(topic)

    # Get articles
    articles = fetcher.fetch_articles(
        query=topic,
        num_articles=5,
        days_back=1
    )

    processed_articles = []

    # Process each article
    for i, article in enumerate(articles, 1):
        print(f"\n{'='*50}")
        print(f"Processing Article {i}: {article['title']}")
        print(f"Source: {article['source']}")
        print(f"URL: {article['url']}")

        try:
            # Summarize content
            summary = summarizer.summarize(article)
            
            # Analyze sentiment
            sentiment = sentiment_analyzer.analyze(summary)

            # Store processed article
            processed_article = {
                "title": article['title'],
                "source": article['source'],
                "url": article['url'],
                "summary": summary,
                "sentiment": sentiment
            }
            processed_articles.append(processed_article)
            
            print(f"\n📝 Summary ({sentiment}):")
            print(summary)

        except Exception as e:
            print(f"❌ Processing failed: {str(e)}")

        print(f"{'='*50}\n")

    # Generate daily digest
    digest = digest_generator.generate(processed_articles)
    print("\n" + "="*60)
    print("DAILY DIGEST REPORT")
    print("="*60)
    print(digest)

    # Save to file
    filename = f"news_digest_{datetime.now(timezone.utc).strftime('%Y%m%d')}.txt"
    with open(filename, "w") as f:
        f.write(digest)
    print(f"\nDigest saved to {filename}")

main()


KeyError: 'sentiment'