# Website Summarizer - DeepSeek via Ollama
Compact website summarization with thinking process filtering.

In [16]:
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import warnings

# !ollama pull deepseek-r1:1.5b
# Disable warnings (including SSL warnings)
warnings.filterwarnings('ignore')

In [None]:
# Constants
API = "http://localhost:11434/api/chat"
MODEL = "deepseek-r1:1.5b"
HEADERS = {"Content-Type": "application/json"}

In [33]:
import re

class WebsiteSummarizer:
    """Website summarizer using DeepSeek via Ollama with thinking process filtering."""
    
    def __init__(self, api_url=API, model=MODEL, headers=HEADERS):
        self.api_url = api_url
        self.model = model
        self.headers = headers
        self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    
    def _filter_thinking(self, text):
        """Remove DeepSeek's <think> tags."""
        cleaned = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()
        return cleaned if cleaned else text
    
    def _fetch_website(self, url):
        """Fetch and parse website content."""
        response = requests.get(url, headers={"User-Agent": self.user_agent}, verify=False)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.content, 'html.parser')
        title = soup.title.string.strip() if soup.title else "No title"
        
        # Remove unwanted elements
        for tag in soup(["script", "style", "img", "input"]):
            tag.decompose()
        
        text = soup.get_text(separator="\n", strip=True)
        return title, text
    
    def summarize(self, url, show_thinking=False):
        """Summarize a website."""
        title, content = self._fetch_website(url)
        
        messages = [
            {"role": "system", "content": "Analyze website content and provide a concise markdown summary focusing on key news and announcements."},
            {"role": "user", "content": f"Website: {title}\n\nContent:\n{content}"}
        ]
        
        payload = {"model": self.model, "messages": messages, "stream": False}
        response = requests.post(self.api_url, json=payload, headers=self.headers)
        response.raise_for_status()
        
        result = response.json()['message']['content']
        return result if show_thinking else self._filter_thinking(result)
    
    def display(self, url, show_thinking=False):
        """Display website summary as markdown."""
        summary = self.summarize(url, show_thinking)
        display(Markdown(summary))

In [34]:
# Create summarizer instance
summarizer = WebsiteSummarizer()
print("✅ Website summarizer ready!")

✅ Website summarizer ready!


In [36]:
# Usage example
summarizer.display("https://cnn.com")

Here is a concise markdown summary highlighting key news and announcements from CNN Breaking News:

---

### **US Region**

1. **Breaking News:**
   - President Trump's tweets and political emails continue to spark controversy and scrutiny.

2. **International Coverage:**
   - Climate change in the US faces a critical debate, with international experts weighing the urgency of action.
   - Global health crisis from Brazil highlights concerns about pandemics and their impact on populations.

3. ** celebrity Gossip & Moments:**
   - Lali Houghton shares a heartfelt moment during her Adele album performance.
   - Julia Demaree Nikhinson's reaction to recent political moves is widely discussed.

---

### **International Region**

1. **Breaking News:**
   - U.S.-China trade war details are shaping policy discussions, with both sides expressing concerns over trade disputes.

2. **Global Coverage:**
   - Climate change in the US and global health crises from Brazil are key topics of international debate.
   - International news highlights the impact of these global issues on individual lives.

---

### **Asian Region**

1. **Breaking News:**
   - Issues with Japanese nuclear powers pose a serious threat to global security, drawing attention worldwide.

2. **International Coverage:**
   - Global health challenges from Brazil continue to shape international perspectives on pandemics.
   - The U.S.-China trade war is ongoing, influencing global economic policies.

---

### **Additional Highlights**

- **Weather News:** Typhoon Ragasa, a significant weather event, features multiple videos.
- **Celebrity Content:** Fun moments and engaging video highlights add an entertaining touch to the site's content.
- **Video Platforms:** Most videos load quickly, while some may take longer due to compatibility issues.

---

This summary captures the essence of CNN Breaking News, emphasizing its balance of breaking news, international coverage, and celebrity moments.

In [35]:
# Additional usage options
print("Usage:")
print("summarizer.display(url)              # Clean summary")
print("summarizer.display(url, True)        # With thinking process") 
print("text = summarizer.summarize(url)     # Get text only")

# Quick test
try:
    summary = summarizer.summarize("https://example.com")
    print(f"\n✅ Success! Summary length: {len(summary)} characters")
except Exception as e:
    print(f"❌ Error: {e}")

Usage:
summarizer.display(url)              # Clean summary
summarizer.display(url, True)        # With thinking process
text = summarizer.summarize(url)     # Get text only

✅ Success! Summary length: 187 characters

✅ Success! Summary length: 187 characters
