# Website summarizer with local Ollama

In [1]:
import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display

# Functions for website summarizer

In [14]:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36"
}

In [3]:
# A class to represent a Webpage
class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [4]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [5]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [6]:
def messages_for(website):
    return [ {"role": "system", "content":system_prompt},
                {"role": "user", "content": user_prompt_for(website)}
           ]
                

## Local Ollama

In [7]:
# Constants

OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [10]:
def summarize(url):
    website = Website(url)
    payload = {
        "model": MODEL,
        "messages": messages_for(website),
        "stream": False
    }

    response = requests.post(OLLAMA_API, 
    json=payload, headers=HEADERS)
    return response.json()['message']['content']
                

In [11]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))
    

In [15]:
display_summary("https://cnn.com")

This appears to be the CNN website's homepage, which provides a wide range of news, articles, and updates on various topics. The page includes:

1. Headlines: A list of top headlines from around the world, including news about politics, business, entertainment, and more.
2. Photos: A gallery of photos from recent events, including the 2025 Grammy Awards.
3. Videos: Links to CNN's video content, including live TV, podcasts, and special reports.
4. Articles: Links to in-depth articles on various topics, such as politics, business, and entertainment.
5. Blogs: Links to blogs from CNN personalities, including Anderson Cooper, Jake Tapper, and others.
6. Podcasts: A list of current and popular podcasts from CNN, including "The Axe Files," "Chasing Life with Dr. Sanjay Gupta," and more.
7. Newsletters: Links to CNN newsletters, which provide daily updates on news, politics, and other topics.
8. Live TV: A schedule of live TV programming, including CNN shows like "Anderson Cooper 360" and "The Situation Room."
9. News: A list of current events and breaking news stories from around the world.
10. Topics: Links to specific topics, such as business, entertainment, sports, and more.

Overall, this page provides a comprehensive overview of CNN's content and offerings, with links to various articles, videos, podcasts, and live TV shows.