This notebook goes through the process of creating a webpage summarizer utilizing the llama3.2 LLM I am running locally

Import Statements

In [24]:
import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display

Connecting to Ollama-llama3.2

In [25]:
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

Creating a Class to Represent a Webpage

In [26]:
# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)

        # Beautiful soup is a package for web scraping
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

Define System Prompt

In [27]:
# The system prompt is what explains the context of the situation to the frontier model
# It tells them what kind of task they are performing and what tone to use

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [28]:
# A function that writes a User Prompt that asks for summaries of websites:
# The user prompt is the actual conversation itself
# The converstaion start and the role of the LLM is to figure out what way to respond to the user prompt in the context of the system prompt

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

Define Message List

In [29]:
# Create a messages list using the same format that we used for OpenAI

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

Summarize Function

In [31]:
def summarize(url):
    # Create the Website object
    website = Website(url)
    
    # Generate the messages payload
    messages = messages_for(website)
    
    # Create the payload for the Ollama API
    payload = {
        "model": MODEL,
        "messages": messages,
        "stream": False  # Disable streaming for simplicity
    }
    
    # Make the request to the Ollama API
    response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
    
    # Extract the summary from the response
    if response.status_code == 200:
        return response.json().get('message', {}).get('content', "No content in response.")
    else:
        return f"Error: {response.status_code} - {response.text}"


Display Summary Function

In [33]:
def display_summary(url):
    # Get the summary from the summarize function
    summary = summarize(url)
    
    # Display the summary in Markdown format
    display(Markdown(summary))


In [34]:
display_summary("https://edwarddonner.com")

# Website Summary

**Overview**
The website is maintained by Edward Donner, a co-founder and CTO of Nebula.io. The platform focuses on applying AI to help people discover their potential.

### News/Announcements

* **December 21, 2024**: Welcome message for SuperDataScientists.
* **November 13, 2024**: Announcement for resources related to mastering AI and LLM engineering.
* **October 16, 2024**: Announcement for resources related to transitioning from a software engineer to an AI data scientist.
* **August 6, 2024**: Introduction to Outsmart LLM Arena, where LLMs compete in a battle of diplomacy and deviousness.

### Featured Content

* The "Outsmart" section features a competition arena where Large Language Models (LLMs) are pitted against each other.

Ollama Package

In [None]:
# # All of the above code could be done with a call to this Ollama package

# response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
# print(response.json()['message']['content'])