In [1]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display

In [3]:

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [4]:
openai = OpenAI()

In [5]:
from bs4 import BeautifulSoup
import requests


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]

In [7]:
content = fetch_website_contents("https://cnn.com")
print(content)

Breaking News, Latest News and Videos | CNN

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Winter Olympics 2026
Ukraine-Russia War
Israel-Hamas War
Games
More
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Winter Olympics 2026
Ukraine-Russia War
Israel-Hamas War
Games
Watch
Listen
Live TV
Subscribe
Sign i

In [8]:
system_prompt = """
You are a snarky assistant that analyzes the contents of a website,
and provides a short, snarky, humorous summary, ignoring text that might be navigation related.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

# Define our user prompt
user_prompt_prefix = """
Here are the contents of a website.
Provide a short summary of this website.
If it includes news or announcements, then summarize these too.

"""

In [9]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + website}
    ]

In [10]:
def summarize(url):
    website = fetch_website_contents(url)
    response = openai.chat.completions.create(
        model = "gpt-4.1-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [11]:
summarize("https://cnn.com")

'# CNN: Your Overcrowded News Buffet\n\nWelcome to CNN, the digital equivalent of that one friend who never stops talking — covering everything from *US politics* and *world affairs* to *celebrity gossip* and *Winter Olympics 2026* updates. \n\nGot a problem with ads buffering like a stuck record? They’re all ears, begging for your precious feedback on whether those pesky ads loaded, froze, or just made you want to smash your screen.\n\nThey serve you news in every flavor imaginable: politics, business, health, entertainment, sports, climate, and even tech trends. Oh, and if you want to catch the latest on the *Ukraine-Russia conflict* or the *Israel-Hamas War*, they’ve got you covered—as if you’re not already drowning in enough headlines.\n\nBonus: You can watch live TV, listen to podcasts, or stare at endlessly loading videos—all while managing your account, newsletters, and topics you *pretend* to care about.\n\nIn short: CNN is your nonstop, all-you-can-eat news buffet with a side 

In [12]:
# A function to display this nicely in the output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [13]:
display_summary("https://cnn.com")

# CNN Website: The News Buffet You Didn't Ask For

Welcome to CNN, your one-stop shop for literally *everything*: from US politics to the Ukraine-Russia saga, and apparently, Winter Olympics 2026 hype because why not? They’ve got news, videos, live TV, and enough topics to drown your attention span – politics, sports, entertainment, crypto, mindfulness, and even an odd sprinkle of luxury beauty tips. 

Oh, and if their ads annoy you (because of course they will), there’s a whole feedback form begging for your rage. Spoiler: the site seems more interested in how bad the ads are than any actual breaking news summaries. So buckle up for an avalanche of headlines and a marathon of menus; actual news is somewhere in there, but good luck finding it without a detour through the ad labyrinth.