In [None]:
import requests
from bs4 import BeautifulSoup
from openai import OpenAI
from IPython.display import display, Markdown

In [None]:
ollama_client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

MODEL = "llama3.2"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/117.0.0.0 Safari/537.36"
}

In [None]:
class Website:
    url: str
    title: str
    text: str

    def __init__(self, url):
        """Create a Website object from the given url using BeautifulSoup"""
        self.url = url
        response = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(response.content, "html.parser")

        self.title = soup.title.string if soup.title else "No title found"

        # Remove irrelevant elements
        for irrelevant in soup(["script", "style", "img", "input"]):
            irrelevant.decompose()

        self.text = soup.get_text(separator="\n", strip=True)

In [None]:
system_prompt = (
    "You are an assistant that analyzes the contents of a website "
    "and provides a short summary, ignoring text that might be navigation related. "
    "Respond in markdown."
)

In [None]:
def user_prompt_for(website: Website) -> str:
    return (
        f"You are looking at a website titled {website.title}\n\n"
        "The contents of this website are as follows; please provide a short summary "
        "of this website in markdown. If it includes news or announcements, then "
        "summarize these too.\n\n"
        f"{website.text}"
    )

In [None]:
def messages_for(website: Website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)},
    ]

In [None]:
def summarize(url: str) -> str:
    website = Website(url)
    response = ollama_client.chat.completions.create(
        model=MODEL,
        messages=messages_for(website)
    )
    return response.choices[0].message.content

In [None]:
summarize("https://simplykae.link")

In [None]:
def display_summary(url: str):
    summary = summarize(url)
    display(Markdown(summary))

In [None]:
display_summary("https://simplykae.link")