In [1]:
## Step1 - Imports

# imports

import os
import requests
import ollama
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display


In [2]:
## Step2 - Creating Header and defining Class

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [3]:
## Step3 - Defining Prompts

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [6]:
website = Website('https://www.cnn.com/')
print(user_prompt_for(website))

You are looking at a website titled Breaking News, Latest News and Videos | CNN
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Underscored
Games
Mo

In [7]:
# Step4 - Summarizing a prompt. A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [8]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [19]:
def summarize(url):
    website = Website(url)
    response = ollama.chat(
        model = "llama3.2",
        messages = messages_for(website)
    )
    return response.message.content

In [20]:
summarize("https://edwarddonner.com")

'# Website Summary\n\nThe website belongs to Edward Donner, a co-founder and CTO of Nebula.io. The content is mostly personal, sharing his interests in writing code, DJing, and amateur electronic music production.\n\n### News and Announcements\n\n* **LLM Workshop – Hands-on with Agents** (January 23, 2025): A workshop where participants will work with agents to learn about LLMs.\n* **Welcome, SuperDataScientists!** (November 13, 2024): An announcement welcoming a community of data scientists.\n* **Mastering AI and LLM Engineering – Resources** (October 16, 2024) and **From Software Engineer to AI Data Scientist – resources**: Two sets of resources sharing knowledge on mastering AI and transitioning from software engineering to AI data science.\n* **December 21, 2024 and December 23, 2024 news**: No specific content available for these dates.'

In [21]:
summarize("https://medium.com/")

"### Medium Website Summary\n\n**Mission**\nMedium aims to provide a platform for people to read, write, and share stories on various topics.\n\n*   **Human Stories & Ideas**: A collection of articles sharing personal experiences, perspectives, and insights from individuals.\n*   **Reading and Writing**: Features a 'Start reading' section allowing users to explore different stories, as well as tools to help writers improve their craft.\n\n**Important Details**\n\n*   **Membership**: Medium has a membership program that offers exclusive features and benefits for subscribers.\n*   **Careers and Press**: Sections dedicated to job opportunities and media coverage of the platform.\n*   **Terms of Service and Privacy Policy**: Clearly stated guidelines users must agree to when creating an account or using the platform."

In [22]:
summarize('https://dandapanillc.activehosted.com/index.php?action=social&chash=ba3866600c3540f67c1e9575e213be0a.1526&s=4127ca34c21890a46efab4ea88e18dff')

"# Summary of The Power of Clarity Website\nThe website focuses on the importance of clarity in life, particularly in answering big questions such as: * What do I want in life? * Who and what is truly important to me? * Why am I here and what is my purpose?\n\n## News/Announcements\n* **LAUNCH SALE**: The Happier Life Program's 48-hour sale ends on April 1st. If you already have the Focus and/or Energy courses, you can complete your program by purchasing only the course(s) you don't yet own at 50% off.\n* Exclusive bonuses for launch period sign-ups: lifetime access to Introduction to Meditation and invitations to live online group Q&A sessions with Dandapani.\n\n## Main Content\nThe website's content emphasizes that mastery of life requires a foundational approach, focusing on understanding and mastering the basics. The Happier Life Program, consisting of three courses (Unwavering Focus Course, Energy Alchemist Course, and Magic of the Mind), is designed to help individuals gain clari