In [62]:
import os
import requests
from bs4 import BeautifulSoup
import openai
from dotenv import load_dotenv

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
google_search_key = os.getenv('GOOGLE_API_KEY')
search_engine_id = os.getenv('SEARCH_ENGINE_CX_ID')

In [50]:
def google_search_wikipedia(query):
    url = f"https://www.googleapis.com/customsearch/v1"
    params = {
        "q": query + " site:wikipedia.org",
        "key": google_search_key,
        "cx": search_engine_id,
        "num": 1  # Get only the top result
    }

    response = requests.get(url, params=params)
    data = response.json()

    if "items" in data:
        first_link = data["items"][0]["link"]
        if "wikipedia.org/wiki/" in first_link:  # Ensure it's a Wikipedia article
            return first_link
    return None

    
test_page = google_search_wikipedia(" who is Bill Gates")
print(test_page)

https://en.wikipedia.org/wiki/Bill_Gates


In [43]:
import requests
from bs4 import BeautifulSoup

def scrape_wikipedia_page(url):
    """Scrapes the first paragraph from a Wikipedia page."""
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    paragraphs = soup.select("div.mw-parser-output > p")
    total_paragraphs = []
    for para in paragraphs:
        if para.text.strip():
            total_paragraphs.append(para.text.strip())
    if len(total_paragraphs) == 0:
        return "No content found."
    else:
        return '\n\n'.join(total_paragraphs)

print(scrape_wikipedia_page(test_page))

William Henry Gates III  (born October 28, 1955) is an American businessman and philanthropist best known for co-founding the software company Microsoft with his childhood friend Paul Allen. He later held the positions of chairman, chief executive officer (CEO), president, and chief software architect of the company. Gates was also its largest individual shareholder until May 2014.[a] He was a pioneer of the microcomputer revolution of the 1970s and 1980s.

In June 2008, Gates transitioned into a part-time role at Microsoft and full-time work at the Bill & Melinda Gates Foundation, the private charitable foundation he and his then-wife Melinda had established in 2000. He stepped down as chairman of the Microsoft board in February 2014 and assumed the role of technology adviser to support newly appointed CEO Satya Nadella. In March 2020, Gates left his board positions at Microsoft and Berkshire Hathaway to focus on his philanthropic efforts on climate change, global health and developme

In [75]:
headers = {
    'User-Agent':  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(['script', 'style', 'img', 'input']):
            irrelevant.decompose()
        self.text = soup.body.get_text()
        paragraphs = soup.select("div.mw-parser-output > p")
        for para in paragraphs:
            if para.text.strip():
                self.main_paragraph = para.text.strip()

class WikiWebsite:
    def __init__(self, url):
        """Scrapes the first paragraph from a Wikipedia page."""
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(['script', 'style', 'img', 'input']):
            irrelevant.decompose()
        self.text = soup.body.get_text()
        paragraphs = soup.select("div.mw-parser-output > p")

        all_paras = []
        for para in paragraphs:
            if para.text.strip():
                all_paras.append(para.text.strip())

        if len(all_paras) == 0:
            self.main_text = "No content found."
        else:
            self.main_text = '\n\n'.join(all_paras)


site = WikipediaWebsite(test_page)
print(site.title)
print(site.main_text)

Bill Gates - Wikipedia
William Henry Gates III  (born October 28, 1955) is an American businessman and philanthropist best known for co-founding the software company Microsoft with his childhood friend Paul Allen. He later held the positions of chairman, chief executive officer (CEO), president, and chief software architect of the company. Gates was also its largest individual shareholder until May 2014.[a] He was a pioneer of the microcomputer revolution of the 1970s and 1980s.

In June 2008, Gates transitioned into a part-time role at Microsoft and full-time work at the Bill & Melinda Gates Foundation, the private charitable foundation he and his then-wife Melinda had established in 2000. He stepped down as chairman of the Microsoft board in February 2014 and assumed the role of technology adviser to support newly appointed CEO Satya Nadella. In March 2020, Gates left his board positions at Microsoft and Berkshire Hathaway to focus on his philanthropic efforts on climate change, glob

In [79]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.main_text
    return user_prompt

def messages_for(website):
    return [
                {'role': 'system', 'content': system_prompt},
                {'role': 'user', 'content': user_prompt_for(website)}
            ]

In [80]:
def summarize(url):
    website = WikiWebsite(url)
    oai = openai.OpenAI()
    response = oai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages_for(website)
    )
    return response.choices[0].message.content

In [81]:
summarize(test_page)

"# Summary of Bill Gates - Wikipedia\n\nWilliam Henry Gates III (born October 28, 1955) is an American businessman and philanthropist, widely recognized for co-founding Microsoft alongside Paul Allen. Gates was instrumental in the microcomputer revolution of the 1970s and 1980s and held various leadership roles in Microsoft until he transitioned to a part-time role in 2008, later focusing full-time on the Bill & Melinda Gates Foundation.\n\n## Key Events:\n- **Transition to Philanthropy**: Gates moved to part-time at Microsoft in June 2008 and fully committed to philanthropy through his foundation, which he started with Melinda in 2000.\n- **Board Positions**: He left his board positions at Microsoft and Berkshire Hathaway in March 2020 to prioritize his philanthropic work in areas like climate change, global health, and education.\n- **Business and Philanthropy**: He has established several companies including BEN, Cascade Investment, and TerraPower. The Bill & Melinda Gates Foundatio