In [1]:
from bs4 import BeautifulSoup
import requests


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]


def fetch_website_links(url):
    """
    Return the links on the webiste at the given url
    I realize this is inefficient as we're parsing twice! This is to keep the code in the lab simple.
    Feel free to use a class and optimize it!
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    links = [link.get("href") for link in soup.find_all("a")]
    return [link for link in links if link]


In [None]:
# check how the above functions work

bbc = fetch_website_contents("https://bbc.com")
print(bbc)

BBC Home - Breaking News, World News, US News, Sports, Business, Innovation, Climate, Culture, Travel, Video & Audio

Skip to content
British Broadcasting Corporation
Home
News
Sport
Business
Innovation
Culture
Arts
Travel
Earth
Audio
Video
Live
Home
News
Israel-Gaza War
War in Ukraine
US & Canada
UK
UK Politics
England
N. Ireland
N. Ireland Politics
Scotland
Scotland Politics
Wales
Wales Politics
Africa
Asia
China
India
Australia
Europe
Latin America
Middle East
In Pictures
BBC InDepth
BBC Verify
Sport
Business
Executive Lounge
Technology of Business
Future of Business
Innovation
Technology
Science & Health
Artificial Intelligence
AI v the Mind
Culture
Film & TV
Music
Art & Design
Style
Books
Entertainment News
Arts
Arts in Motion
Travel
Destinations
Africa
Antarctica
Asia
Australia and Pacific
Caribbean & Bermuda
Central America
Europe
Middle East
North America
South America
World’s Table
Culture & Experiences
Adventures
The SpeciaList
Earth
Natural Wonders
Weather & Science
Climate 

In [5]:
# LLM

from openai import OpenAI


client = OpenAI(
 api_key = "FAKE", 
 base_url = "http://localhost:1337/v1"
)



system_prompt = """
You are a snarky assistant that analyzes the contents of a website,
and provides a short, snarky, humorous summary, ignoring text that might be navigation related.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

user_prompt_prefix = """
Here are the contents of a website.
Provide a short summary of this website.
If it includes news or announcements, then summarize these too.

"""

bbc = fetch_website_contents("https://bbc.com")


messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + bbc}
    ]

response = client.chat.completions.create(
    model = "gpt-4.1-mini",
    messages = messages
)

print(response.choices[0].message.content)

### BBC Homepage Roundup: News Faster Than You Can Say "Skip Navigation"

Ah, the BBC homepage – a labyrinth of menus so tangled, even Theseus would need a GPS. Strip away the endless nav clutter (News, Sport, Business, Innovation, Culture, the works – like scrolling through your grandma's phonebook), and we're left with the juicy bits: a smorgasbord of global drama and sports schadenfreude.

**Top Meldings (a.k.a. News)**:  
- **Tank Toy Heist Redux**: The US is playing pirate king again, seizing its *second* oil tanker off Venezuela's coast. Popeye Joe (aka Trump) declared a "blockade" on sanctioned ships – because nothing says "diplomacy" like floating seizures. (10 hrs ago, World section – see, even the bots are recycling headlines for that desperate clickbait vibe.)  
- **Cricket Calamity Chronicles**: England's Ashes squad? Picture booze, beaches, and a brutal beatdown Down Under. Sport scribe Stephan Shemilt spills the off-pitch tea – turns out, mixing pints with pitches is a re