In [7]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [8]:
# Load environment variables from .env file
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [9]:
openai = OpenAI()

In [None]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser') # beautifulsoup helps parse html content
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [11]:
link=Website("https://en.wikipedia.org/wiki/Perplexity_AI")
print(link.title)
print(link.text[:500])  # Print the first 500 characters of the text content

Perplexity AI - Wikipedia
Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Contents
Current events
Random article
About Wikipedia
Contact us
Contribute
Help
Learn to edit
Community portal
Recent changes
Upload file
Special pages
Search
Search
Appearance
Donate
Create account
Log in
Personal tools
Donate
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Contents
move to sidebar
hide
(Top)
1
History
2
Products and services
Toggle Products and services subsection
2


In [28]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [29]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt



In [None]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [30]:
# Generate a summary of content fetched by scraping the website
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content


In [31]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt_for(link)}
]

In [32]:
# To give you a preview -- calling OpenAI with system and user messages:

response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)

# Summary of Perplexity AI - Wikipedia

**Perplexity AI, Inc. Overview**  
Perplexity AI is an American private software company founded in 2022, based in San Francisco, California. It operates a web search engine that utilizes large language models to process user queries and provide synthesized responses with inline citations. The company offers both a free public version and a paid Pro subscription with advanced features. As of 2024, it had about 52 employees and was valued at around $18 billion.

**History and Growth**  
Perplexity AI was established by Aravind Srinivas, Denis Yarats, Johnny Ho, and Andy Konwinski. It launched its main search engine on December 7, 2022. Since its inception, the company has experienced significant growth, reporting two million unique visitors and handling up to 780 million queries monthly by May 2025.

**Products and Services**  
The company’s offerings include:
- **Perplexity Pro**: A subscription service providing access to premium features.
- **I