In [1]:
import os
import requests
from openai import OpenAI
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from IPython.display import Markdown, display

In [6]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
openai = OpenAI()

In [7]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else 'No Title'
        for irrelevent in soup.body(["script", "style", "img", "input"]):
            irrelevent.decompose()
        self.text = soup.body.get_text(separator='\n', strip=True)

In [8]:
ed = Website("https://edwarddonner.com")
print(ed.title)
print(ed.text)

Home - Edward Donner
Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers and tons of press coverage.
Connec

In [58]:
system_prompt = "You are given the content of a website. Your task is to:\n\
- Extract and return the **title** of the page, if one exists.\n\
- Provide a **summary** of the main body content.\n\
- **Ignore** navigation menus, headers, footers, or sidebars.\n\
- At the end, if there is an email address, return it in standard email format (e.g., john@example.com), not obfuscated."

In [59]:
print(system_prompt)

You are given the content of a website. Your task is to:
- Extract and return the **title** of the page, if one exists.
- Provide a **summary** of the main body content.
- **Ignore** navigation menus, headers, footers, or sidebars.
- At the end, if there is an email address, return it in standard email format (e.g., john@example.com), not obfuscated.


In [42]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [43]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [60]:
def summarize():
    web_url = input('Provide URL: ')
    web_url = Website(web_url)
    response = openai.chat.completions.create(model = 'gpt-4o-mini', messages=messages_for(web_url))
    return response.choices[0].message.content

In [61]:
def display_summarize(response):
    display(Markdown(response))

In [63]:
response = summarize()
display_summarize(response)

Provide URL:  https://www.bbc.com/


# BBC Home - Breaking News, World News, US News, Sports, Business, Innovation, Climate, Culture, Travel, Video & Audio

## Summary

The BBC Home page provides comprehensive coverage of current events across various domains including news, sports, business, and culture. Major headlines include:

- **Trump's 'Liberation Day' Tariffs**: Anticipation builds around President Trump's upcoming announcement regarding new import taxes, with analysis from Erin Delmore.
- **Myanmar Earthquake**: A report details the impact of a devastating earthquake that killed many during Friday prayers, with a focus on individual stories of loss.
- **Political Updates**: Coverage includes a Republican victory in a special election in Florida and Cory Booker's record-breaking Senate speech against the Trump agenda.
- **International Developments**: Includes reporting from Sudan’s capital after significant destruction and rising geopolitical tensions regarding tariffs.

Additionally, various news segments cover cultural topics, environmental issues, and scientific advancements reflecting a broad spectrum of interests and concerns.

### Email

There is no email address provided on the page content.