In [14]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [16]:
openai = OpenAI()

In [10]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [21]:
website = Website("https://www.nytimes.com/")
print(website.title)
print(website.text)

The New York Times - Breaking News, US News, World News and Videos
Skip to content
Skip to site index
SKIP ADVERTISEMENT
U.S.
International
Canada
Español
中文
Today’s Paper
U.S.
Sections
U.S.
Politics
New York
California
Education
Health
Obituaries
Science
Climate
Weather
Sports
Business
Tech
The Upshot
The Magazine
Top Stories
Trump Transition
Supreme Court
Congress
Immigration
Abortion
Newsletters
The Morning
Make sense of the day’s news and ideas.
The Upshot
Analysis that explains politics, policy and everyday life.
See all newsletters
Podcasts
The Daily
The biggest stories of our time, in 20 minutes a day.
The Run-Up
On the campaign trail with Astead Herndon.
See all podcasts
World
Sections
World
Africa
Americas
Asia
Australia
Canada
Europe
Middle East
Science
Climate
Weather
Health
Obituaries
Top Stories
Middle East Crisis
Russia-Ukraine War
China International Relations
The Global Profile
Leer en Español
Newsletters
Morning Briefing: Europe
Get what you need to know to start your 

In [35]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and creates a MCQ quiz based on the content of the website. \
The quiz has maximum 5 questions \
Write the correct answer after each MCQ question"

In [36]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nPlease generate a MCQ Quiz based on the website content which you can find below:\n"
    user_prompt += website.text
    return user_prompt

In [38]:
print(user_prompt_for(website))

You are looking at a website titled The New York Times - Breaking News, US News, World News and Videos
Please generate a MCQ Quiz based on the website content which you can find below:
Skip to content
Skip to site index
SKIP ADVERTISEMENT
U.S.
International
Canada
Español
中文
Today’s Paper
U.S.
Sections
U.S.
Politics
New York
California
Education
Health
Obituaries
Science
Climate
Weather
Sports
Business
Tech
The Upshot
The Magazine
Top Stories
Trump Transition
Supreme Court
Congress
Immigration
Abortion
Newsletters
The Morning
Make sense of the day’s news and ideas.
The Upshot
Analysis that explains politics, policy and everyday life.
See all newsletters
Podcasts
The Daily
The biggest stories of our time, in 20 minutes a day.
The Run-Up
On the campaign trail with Astead Herndon.
See all podcasts
World
Sections
World
Africa
Americas
Asia
Australia
Canada
Europe
Middle East
Science
Climate
Weather
Health
Obituaries
Top Stories
Middle East Crisis
Russia-Ukraine War
China International Rela

In [39]:
def generate_prompt(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [40]:
generate_prompt(website)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and creates a MCQ quiz based on the content of the website. The quiz has maximum 5 questions Write the correct answer after each MCQ question'},
 {'role': 'user',
  'content': "You are looking at a website titled The New York Times - Breaking News, US News, World News and Videos\nPlease generate a MCQ Quiz based on the website content which you can find below:\nSkip to content\nSkip to site index\nSKIP ADVERTISEMENT\nU.S.\nInternational\nCanada\nEspañol\n中文\nToday’s Paper\nU.S.\nSections\nU.S.\nPolitics\nNew York\nCalifornia\nEducation\nHealth\nObituaries\nScience\nClimate\nWeather\nSports\nBusiness\nTech\nThe Upshot\nThe Magazine\nTop Stories\nTrump Transition\nSupreme Court\nCongress\nImmigration\nAbortion\nNewsletters\nThe Morning\nMake sense of the day’s news and ideas.\nThe Upshot\nAnalysis that explains politics, policy and everyday life.\nSee all newsletters\nPodcasts\nThe Daily\nThe b

In [41]:
def generate_quiz(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [42]:
generated_content = generate_quiz("https://www.nytimes.com/")

In [43]:
display(Markdown(generated_content))

### MCQ Quiz based on The New York Times Website Content

1. What type of analysis does "The Upshot" section of The New York Times focus on?
   A) Sports results  
   B) Personal finance  
   C) Politics, policy, and everyday life  
   D) Arts and Culture  
   **Correct Answer: C) Politics, policy, and everyday life**

2. Which podcast offers a 20-minute summary of the biggest stories of our time?
   A) The Run-Up  
   B) The Daily  
   C) Matter of Opinion  
   D) Hard Fork  
   **Correct Answer: B) The Daily**

3. Which section of The New York Times would you explore to get the latest updates on international relations involving China?
   A) World  
   B) Business  
   C) Opinion  
   D) Sports  
   **Correct Answer: A) World**

4. What event is associated with the term "Middle East Crisis" mentioned in the website's top stories?
   A) Climate Change Summit  
   B) Russia-Ukraine War  
   C) U.S. Economy  
   D) Recent conflicts involving Israel and Palestine  
   **Correct Answer: D) Recent conflicts involving Israel and Palestine**

5. Which section would you visit to find cooking recipes and meal ideas?
   A) Lifestyle  
   B) Opinion  
   C) Business  
   D) International  
   **Correct Answer: A) Lifestyle**