In [None]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI
import certifi
import warnings

# Disable warnings (including SSL warnings)
# warnings.filterwarnings('ignore')

## Setup Instructions for the API connection

1. Create an OpenAI account by visiting:

    https://platform.openai.com/

2. OpenAI asks for a minimum credit to use the API.

    You can add your credit balance to OpenAI at Settings > Billing: 
    https://platform.openai.com/settings/organization/billing/overview

3. Create your API key

    The webpage where you set up your OpenAI key is at https://platform.openai.com/api-keys - press the green 'Create new secret key' button and press 'Create secret key'. Keep a record of the API key somewhere private; you won't be able to retrieve it from the OpenAI screens in the future. It should start `sk-proj-`.

4. Create .env 

    Create a new file called `.env` in your project root directory. Here's how to do it:

    1. Open Terminal (Applications > Utilities > Terminal)

    2. Navigate to the project root directory

    3. Create the .env file with

        `nano .env`
    
    4. Then type your API keys into nano, replacing xxxx with your API key (starting `sk-proj-`).

        ```
        OPENAI_API_KEY=xxxx
        ```

    5. Save the file:

        Control + O  
        Enter (to confirm save the file)  
        Control + X to exit the editor

    6. Use this command to list files in your project root directory:

        

        And confirm that the `.env` file is there.

Alternatively: you need to have installed [Ollama](https://ollama.com/) on your computer, and run `ollama run llama3.2` in a Powershell or Terminal.

In [None]:
# init OpenAI client library
openai = OpenAI()

# alternatively: point to open-source, locally hosted model (e.g. Ollama llama3.2)
openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [None]:
# A class to represent a Webpage

## Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        # Parse the webpage with BeautifulSoup
        response = requests.get(url, headers=headers)  # , verify=False (only for test purposes)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        # skip images and styling
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [13]:
# create CNN object
cnn = Website("https://cnn.com")
print(cnn.title)
print(cnn.text)

Breaking News, Latest News and Videos | CNN
CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
More
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
Watch
Listen
Live TV
Subscribe
Sign in
My Account
Settings
Newsletters
Topics yo

In [14]:
# Define system prompt

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [16]:
# A function that writes a User Prompt that asks for summaries of websites:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
Focus on news or announcements.\n\n"
    user_prompt += website.text
    return user_prompt

## Messages

The API from OpenAI expects to receive messages in a particular structure.
Many of the other APIs share this structure:

```
[
    {"role": "system", "content": "system message goes here"},
    {"role": "user", "content": "user message goes here"}
]

In [19]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [None]:
# Function to summarize a website using an LLM API
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "llama3.2",  # or "gpt-4o-mini"
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [None]:
# A function to display results nicely in the Jupyter output, using markdown
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [23]:
display_summary("https://cnn.com")

**Breaking News and Latest Developments**
=====================================

**Global News**

* Charlie Kirk's memorial service highlights the evolving populist conservatism after Trump's departure
* NATO's 'Eastern Sentry' operation faces scrutiny over its speed and effectiveness in countering Russia's growing drone threat
* The US economy grapples with a new challenge: Trump's $100K fee on H-1B visas

**Politics**

* The Republican Party's majority in the 2026 election hangs by a thread, with Trump's actions being scrutinized to determine his survival
* GOP lawmakers clash over Trump's handling of media controversies and its impact on the party's future

**Business**

* Warren Buffett's fund exits BYD after a 17-year investment that grew 20-fold in value
* China's DeepSeek shakes the tech world, with its developer revealing the cost of training the AI model

**Entertainment**

* 'Jimmy Kimmel Live!' crew to receive paid work despite show's uncertain future
* A photographer reveals the reality behind the 'hyper-romantic versions' of nature

**Climate and Science**

* The 'blob' returns, stretching across the North Pacific Ocean
* Scientists discover a new species swimming by an underwater camera

**Travel and Culture**

* European airports struggle to fix a check-in glitch after hacking disruption
* A private ranch in Uganda is home to almost 50 rhinos who went extinct 40 years ago