In [1]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

## Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [3]:
openai = OpenAI()

### Web scraping 

In [4]:
def scrape_website(url):

    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(service=Service(), options=options)
    driver.get(url)

    time.sleep(2)

    soup = BeautifulSoup(driver.page_source, "html.parser")
    title = soup.title.string if soup.title else "No title found"

    for irrelevant in soup(["script", "style", "img", "input"]):
        irrelevant.decompose()

    text = soup.get_text(separator="\n", strip=True)

    driver.quit()
    return title, text

### Type of Prompts

In [5]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [6]:
def user_prompt_for(url):
    title, text = scrape_website(url)
    user_prompt = f"You are looking at a website titled {title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += text
    return user_prompt

In [7]:
def messages_for(url):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(url)}
    ]

In [8]:
def summarize(url):
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(url)
    )
    return response.choices[0].message.content

In [9]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [10]:
display_summary("https://edition.cnn.com/")

# CNN Website Summary

The CNN website serves as a comprehensive source of breaking news and video content, covering a wide range of topics including U.S. and world news, politics, business, health, entertainment, style, travel, sports, science, and climate. 

## Recent News Highlights

- **Ukraine-Russia War**: A prominent Ukrainian politician was shot dead in the western city of Ukraine. Meanwhile, Ukraine has reportedly struck significant bridges in Russia.
  
- **Israel-Hamas Conflict**: Israel is halting airdrops ahead of a planned offensive in Gaza City as the Red Cross calls for a mass evacuation, deemed "impossible" by many.

- **Indonesia Protests**: Protests in Indonesia have escalated, leading to violence and destruction in various locations, including the burning of a parliament building.

- **Global Events**: Leaders such as Putin and Modi are attending a significant China-led summit, amid discussions on reshaping global relations.

- **Health News**: Recent studies indicate that a common heart attack medication may be ineffective and could increase the risk of death for some women. 

- **US Politics**: The Trump administration faces legal challenges, including a federal appeals court ruling that found some of Trump's tariffs unlawful, and concerns regarding migrant deportations.

- **Entertainment**: The 2025 Venice Film Festival has generated buzz, alongside highlights from the U.S. Open featuring star players like Novak Djokovic.

Overall, CNN aims to keep readers informed with timely updates on major global and local issues, along with in-depth analysis and engaging multimedia content.