# URL Summarization using Ollama

This notebook demonstrates how to use Ollama (local LLM) to summarize content from a URL.


In [None]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
from ollama import chat
from ollama import ChatResponse


## Step 1: Extract text content from a URL


In [None]:
def extract_text_from_url(url):
    """
    Fetches a webpage and extracts all text content from it.
    
    Args:
        url (str): The URL of the webpage to extract text from
        
    Returns:
        str: The extracted text content from the webpage
    """
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        raise


## Step 2: Summarize content using Ollama


In [None]:
def summarize_url(url, model='gemma3', max_words=100):
    """
    Extracts text from a URL and summarizes it using a local LLM via Ollama.
    
    Args:
        url (str): The URL of the webpage to summarize
        model (str): The Ollama model to use (default: 'gemma3')
        max_words (int): Maximum number of words for the summary (default: 100)
        
    Returns:
        str: The summary of the webpage content
    """
    # Extract text content from URL
    text_content = extract_text_from_url(url)
    
    # Truncate content if it's too long (to avoid token limits)
    # Most models have context limits, so we'll limit to first 5000 characters
    if len(text_content) > 5000:
        text_content = text_content[:5000] + "... [content truncated]"
    
    # Create prompt for summarization
    prompt = f"Please summarize the following text in approximately {max_words} words:\n\n{text_content}"
    
    # Call Ollama API
    messages = [
        {
            'role': 'user',
            'content': prompt
        }
    ]
    
    response: ChatResponse = chat(model=model, messages=messages)
    return response.message.content


## Step 3: Test the solution

Let's test the URL summarization function with an example URL.


In [None]:
# Example: Summarize a webpage
url = "https://www.geeksforgeeks.org/dsa/introduction-to-arrays-data-structure-and-algorithm-tutorials/"

print("Extracting and summarizing content from:", url)
print("\n" + "="*80 + "\n")

summary = summarize_url(url, model='gemma3', max_words=100)
print("Summary:")
print(summary)
