In [1]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [3]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/curriculum/',
 'https://edwarddonner.com/proficient/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://edwarddonner.com/curriculum/',
 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/',
 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/1

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [5]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/curriculum/
https://edwarddonner.com/proficient/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://edwarddonner.com/curriculum/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/

In [6]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
        {"role":"system", "content": link_system_prompt},
        {"role": "user", "content" : get_links_user_prompt(url)}],
        response_format = {"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links
select_relevant_links("https://edwarddonner.com")
# select_relevant_links("https://huggingface.co")

Found 9 relevant links


{'links': [{'type': 'company homepage', 'url': 'https://edwarddonner.com/'},
  {'type': 'curriculum page', 'url': 'https://edwarddonner.com/curriculum/'},
  {'type': 'capabilities page', 'url': 'https://edwarddonner.com/proficient/'},
  {'type': 'portfolio/project',
   'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'portfolio/project', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'Facebook page',
   'url': 'https://www.facebook.com/edward.donner.52'}]}

## Now lets make this better and a proper Brochure Generator!!!

In [7]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Found 12 relevant links
## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-OCR
Updated
4 days ago
‚Ä¢
204k
‚Ä¢
753
Qwen/Qwen3-Coder-Next
Updated
4 days ago
‚Ä¢
53.5k
‚Ä¢
560
moonshotai/Kimi-K2.5
Updated
2 days ago
‚Ä¢
335k
‚Ä¢
1.81k
stepfun-ai/Step-3.5-Flash
Updated
about 8 hours ago
‚Ä¢
12k
‚Ä¢
502
circlestone-labs/Anima
Updated
7 days ago
‚Ä¢
60.6k
‚Ä¢
487
Browse 2M+ models
Spaces
Running
on
Zero
Featured
1.28k
Qwen3-TTS Demo
üéô
1.28k
Transform text into natural-sounding speech with custom voices
Running
on
A100
167
ACE-Step v1.5
üéµ
167
Music Generation Foundation Model v1.5
Running
464
Demo Playground
‚ö°
464
Free platform to access multiple AI models
Running

In [None]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000]
    return user_prompt



In [15]:
from turtle import up


def create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream = True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [16]:
create_brochure("HuggingFace", "https://huggingface.co")

Found 13 relevant links


# Hugging Face Brochure

---

## About Hugging Face

**Hugging Face** is the vibrant AI community and platform building the future of machine learning (ML). It serves as a central hub where ML engineers, scientists, researchers, and enthusiasts collaborate openly to create, share, and explore ML models, datasets, and applications. Hugging Face empowers the next generation of machine learning practitioners to innovate faster, share better, and build an open, ethical AI future.

---

## What We Offer

- **Hugging Face Hub**  
  A collaborative platform hosting **over 2 million models**, **500,000+ datasets**, and **1 million+ AI applications** spanning modalities from text, image, video, audio to 3D.  
  Users can host unlimited public projects and engage with the ML community worldwide.

- **Open Source Stack**  
  Hugging Face provides an extensive open-source ecosystem to accelerate ML workflows, making it easier and faster to build AI applications.

- **Spaces**  
  Interactive AI Apps and demos powered on various hardware (Zero, A100 GPUs), allowing users to experiment with models directly on the platform.

- **Compute & Enterprise Solutions**  
  Paid compute resources and enterprise-grade platforms are available with enhanced security, access controls, and dedicated support to empower teams and organizations to build AI at scale.

---

## Community & Collaboration

- A fast-growing, inclusive AI community dedicated to openness and ethical AI development.  
- Provides tools for building a personal ML portfolio by sharing your projects and research.  
- A space for collaboration across developers, researchers, and end-users, facilitating global innovation and knowledge exchange.

---

## Company Culture

Hugging Face nurtures a culture centered around:

- **Open Collaboration:** Encouraging transparent sharing of models, datasets, and ideas.  
- **Innovation:** Empowering developers to accelerate AI research and application development.  
- **Inclusion and Ethics:** Committed to building AI responsibly and ensuring diversity in the AI community.  
- **Learning & Sharing:** Supporting education and growth within the ML ecosystem.

---

## Who Uses Hugging Face?

- Machine learning engineers and researchers seeking robust open-source libraries and resources.  
- Enterprises requiring scalable AI infrastructure with security and team management.  
- Developers and creatives experimenting with AI models across various media types.  
- Educators and learners building portfolios and gaining hands-on experience in AI.  

---

## Careers at Hugging Face

Join a forward-thinking company shaping the future of AI. Hugging Face looks for passionate individuals who:

- Believe in open-source and the collaborative development of AI.  
- Want to work with cutting-edge ML technologies and tools.  
- Value diversity, inclusion, and ethical AI advancement.  
- Are excited about contributing to a global ML community and building innovative solutions.

Opportunities span engineering, research, product development, community engagement, and enterprise solutions.

---

## Get Started with Hugging Face

- **Explore AI Models & Applications:** Dive into over 2 million models and thousands of demos.  
- **Host Your Work:** Share your datasets, models, and apps with the global AI community.  
- **Build Your Portfolio:** Establish your presence in the ML space with your work showcased on the Hub.  
- **Scale with Enterprise:** Access powerful compute and professional-grade tools to boost your team‚Äôs productivity.

**Stay connected and collaborate with the AI community that‚Äôs building tomorrow‚Äôs technology‚Äîtoday.**

---

**Visit:** [huggingface.co](https://huggingface.co)  
**Join the community, start building, and be part of the AI revolution.**

---

### Brand Colors & Assets

- Vibrant Yellow: #FFD21E  
- Bold Orange: #FF9D00  
- Neutral Gray: #6B7280

(High-quality logos and brand assets available for partners and developers.)

---

Hugging Face ‚Äî The AI community building the future.