In [29]:
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin
import time
from IPython.display import display, Markdown


In [10]:
from scraper import fetch_url_content, fetch_website_links
import json

In [1]:
from openai import OpenAI

OLLAMA_BASE_URL = "http://localhost:11434/v1"

ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

In [None]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/curriculum/',
 'https://edwarddonner.com/proficient/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://edwarddonner.com/curriculum/',
 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/',
 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/1

In [5]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [7]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/curriculum/
https://edwarddonner.com/proficient/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://edwarddonner.com/curriculum/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/

In [8]:
def select_relevant_links(url):
    response = ollama.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [11]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'company page', 'url': 'https://edwarddonner.com/'},
  {'type': 'curriculum page', 'url': 'https://edwarddonner.com/curriculum/'},
  {'type': 'proficient page', 'url': 'https://edwarddonner.com/proficient/'},
  {'type': 'careers/jobs page',
   'url': 'https://www.linkedin.com/in/eddonner/'}]}

In [26]:
import time

def fetch_page_and_all_relevant_links(url, delay=1):
    contents = fetch_url_content(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"

    for link in relevant_links.get('links', []):
        url_link = link.get("url") or link.get("href")
        if not url_link:
            continue

        result += f"\n\n### Link: {link.get('type', 'Link')}\n"

        # Wrap fetch_url_content in try/except to catch HTTP errors
        try:
            result += fetch_url_content(url_link)
        except Exception as e:
            result += f"[Error fetching {url_link}: {e}]"

        time.sleep(delay)  # polite delay to reduce 429 errors

    return result


In [17]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

## Landing Page:

Hugging Face â€“ The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-4.7-Flash
Updated
5 days ago
â€¢
363k
â€¢
1.14k
nvidia/personaplex-7b-v1
Updated
2 days ago
â€¢
29.4k
â€¢
872
microsoft/VibeVoice-ASR
Updated
4 days ago
â€¢
21.7k
â€¢
471
Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
Updated
2 days ago
â€¢
42.7k
â€¢
388
unsloth/GLM-4.7-Flash-GGUF
Updated
1 day ago
â€¢
196k
â€¢
314
Browse 2M+ models
Spaces
Running
on
Zero
581
Qwen3-TTS Demo
ðŸŽ™
581
Convert text to speech with custom voices and cloning
Running
on
Zero
Featured
1.13k
Qwen Image Multiple Angles 3D Camera
ðŸŽ¥
1.13k
Adjust camera angles in images using 3D controls or sliders
Running
on
Zero
MCP
1.77k
Z Image Turbo
ðŸ–¼
1.77k
Gen

In [18]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [20]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [27]:
def create_brochure(company_name, url):
    response = ollama.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [30]:
create_brochure("edwarddonner", "https://edwarddonner.com")

**The Edward Donner Story**

At Edward Donner, we're on a mission to harness the power of Artificial Intelligence (AI) to help people discover their potential and pursue their purpose. Founded by Ed, a seasoned AI expert with a passion for innovation and making a positive impact.

**Our Story**

Ed's journey began with creating online courses on Advanced Machine Learning, which became top-rated best-sellers across 190 countries. This success led him to co-found Nebula.io, where he applied AI to the field of recruitment, helping recruiters source and engage talent more effectively. Our patented model uses Generative AI to match people with roles that align with their passions and strengths.

**Our Goals**

At Edward Donner, we're dedicated to making a positive impact on human prosperity. We aim to raise the level of fulfillment and success for individuals through our innovative recruitment solutions and community support. Inspired by the concept of Ikigai (finding work that brings joy, purpose, and fulfillment), we strive to empower people to pursue their true potential.

**Community & Culture**

We believe in the power of collaboration and community-driven growth. Our space is filled with like-minded individuals passionate about AI, innovation, and making a difference. Join us on our mission to revolutionize recruitment and talent management using Generative AI.

**Stay Connected**

Want to stay up-to-date on our latest developments, success stories, and industry insights? Subscribe to our newsletter and connect with us on social media:

 LinkedIn
 Twitter
 Facebook

Follow Ed's journey on [www.edwarddonner.com](http://www.edwarddonner.com)

Join our community and let's transform the way people discover their potential!