In [1]:

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown,display
from openai import OpenAI
from typing import Dict, List, Optional

class WebsiteAnalyzer:
    def __init__(self, ollama_url: str = "http://localhost:11434"):
        """Initialize the analyzer with Ollama endpoint."""
        self.ollama_url = ollama_url.rstrip('/')
        self.headers = {
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
                         "(KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36"
        }
    def _fetch_website_content(self, url: str) -> tuple[str, str]:
        """Fetch website content and return title and text."""
        response = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        title = soup.title.string if soup.title else "No title found"
        
        # Remove unnecessary elements
        for tag in soup.find_all(['script', 'style', 'img']):
            tag.decompose()
        
        text = soup.body.get_text(separator="\n", strip=True) if soup.body else ""
        return title, text
    def _create_messages(self, title: str, text: str) -> List[Dict[str, str]]:
        """Create message structure for Ollama."""
        return [
            {
                "role": "user",
                "content": f"The generated title is {title}\n"
                          "retrieve all the necessary information creating a summary of the important notes \n"
                          "with all the necessary contact information of each summarised sector \n"
                          "an easy means of how to make events in calendar \n"
                          f"{text}"
            },
            {
                "role": "system",
                "content": "i need your help in categorising the website of the industrial sector "
                          "basing on its sectors and all its involved in"
            }
        ]
    def analyze_website(self, url: str) -> str:
        """Analyze website content using Ollama."""
        # Fetch website content
        title, text = self._fetch_website_content(url)
        
        # Prepare the request to Ollama
        messages = self._create_messages(title, text)
        payload = {
            "model": "llama3.2",
            "messages": messages,
            "stream": False
        }
        
        # Make request to Ollama
        response = requests.post(
            f"{self.ollama_url}/api/chat",
            json=payload,
            headers={"Content-Type": "application/json"}
        )
        
        # Parse response
        result = response.json()
        content = result.get("message", {}).get("content", "No response from model")
        
        # Display in notebook if possible
        try:
            display(Markdown(content))
        except:
            print(content)
        
        return content
def analyze_site(url: str) -> str:
    """Convenience function to analyze a website."""
    analyzer = WebsiteAnalyzer()
    return analyzer.analyze_website(url)
# Example usage
if __name__ == "__main__":
    result = analyze_site("https://edwarddonner.com")

