# Goal: Website Summarizer

In [1]:
import requests
from bs4 import BeautifulSoup

class WebScraper:
    def __init__(self):
        pass

    def scrape_text(self, url, element_type, element_attribute=None, attribute_value=None):
        """
        Scrapes text from a website.

        Args:
            url (str): The URL of the website to scrape.
            element_type (str): The HTML element type to target (e.g., "p", "h1", "div").
            element_attribute (str, optional): The attribute to filter by (e.g., "class", "id"). Defaults to None.
            attribute_value (str, optional): The value of the attribute to filter by. Defaults to None.

        Returns:
            list: A list of strings containing the extracted text.
        """
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
            soup = BeautifulSoup(response.content, "html.parser")

            if element_attribute and attribute_value:
                elements = soup.find_all(element_type, {element_attribute: attribute_value})
            else:
                elements = soup.find_all(element_type)

            text_list = [element.get_text(strip=True) for element in elements]
            return text_list

        except requests.exceptions.RequestException as e:
            print(f"Error fetching URL: {e}")
            return []
        except Exception as e:
            print(f"An error occured: {e}")
            return []

In [2]:

# scraper = WebScraper()
# url = "https://medium.com/@spaw.co/best-websites-to-practice-web-scraping-9df5d4df4d1" #replace with your target website.
# paragraphs = scraper.scrape_text(url, "p")
# print(paragraphs)

# header_text = scraper.scrape_text(url, "h1")
# print(header_text)

# div_class_text = scraper.scrape_text(url, "div", "class", "example-class")
# print(div_class_text)

# Call ChatGPT API to summarise the contents

In [3]:
import openai
import os
from IPython.display import display, Markdown
from dotenv import load_dotenv
load_dotenv()

class TextSummarizer:
    def __init__(self, openai_api_key):
        openai.api_key = openai_api_key

    def summarize_text(self, text):
        """Summarizes text using the ChatGPT API."""
        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that summarizes text."},
                    {"role": "user", "content": f"Summarize the following text: {text}"},
                ],
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            print(f"Error summarizing text: {e}")
            return ""

In [4]:
# # Example usage:
openai_api_key = os.getenv('OPENAI_API_KEY') #replace with your api key.
url = "https://en.wikipedia.org/wiki/Robert" #replace with your target website.

scraper = WebScraper()
summarizer = TextSummarizer(openai_api_key)

scraped_text = scraper.scrape_text(url, "p")
summary = summarizer.summarize_text(scraped_text)

display(Markdown(summary))

The name Robert is an ancient Germanic given name, derived from Proto-Germanic roots meaning "fame" and "bright". It is one of the most frequently used names of ancient Germanic origin and is also used as a surname. The name became popular in Continental Europe before entering England in its Old French form. Robert is a common name in many Germanic languages and has various forms in different languages. It was a royal name in France, Germany, Scotland, and England during the medieval period and was popular among nobility. Despite its historical associations, Robert remains one of the most frequently given male names and was in the top 10 most popular boys' names in the United States for 47 years. It is the fourth most common name in the United States. The name also has significance in Italian history during World War II. The name's root is the origin of the modern English word "bright".