In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from utils.scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [None]:
links = fetch_website_links("https://edwarddonner.com")
links

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [6]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/curriculum/
https://edwarddonner.com/proficient/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://edwarddonner.com/curriculum/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/

In [9]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [10]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'home page', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'resume page', 'url': 'https://edwarddonner.com/curriculum/'},
  {'type': 'capabilities page', 'url': 'https://edwarddonner.com/proficient/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'blog index', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/11/11/ai-live-event/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-lead

In [None]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [None]:
select_relevant_links("https://edwarddonner.com")
select_relevant_links("https://huggingface.co")

In [23]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [None]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

In [15]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [25]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
    You are looking at a company called: {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.\n\n
    """
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [17]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [None]:
create_brochure("HuggingFace", "https://huggingface.co")

In [13]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [24]:
stream_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face Brochure

---

## About Hugging Face  

**Hugging Face** is a pioneering AI community and collaboration platform at the forefront of the machine learning revolution. It serves as the home for machine learning engineers, scientists, and enthusiasts to **create, discover, and collaborate** on open-source models, datasets, and AI applications. With a vibrant and fast-growing global community, Hugging Face empowers the next generation of AI innovators to build an **open, ethical, and inclusive AI future** together.

---

## What They Offer  

- **Extensive Model Repository:** Access and contribute to over **2 million machine learning models**, covering modalities like text, image, video, audio, and even 3D data, enabling rapid experimentation and deployment.  
- **Datasets & Applications:** Explore over **500,000 datasets** and **1 million+ AI applications** powering cutting-edge research and real-world solutions.  
- **Spaces:** Interactive AI demos and applications hosted on the platform, showcasing everything from custom speech generation to image synthesis and transcription tools.  
- **The Hugging Face Hub:** A central platform to host unlimited public models, datasets, and applications with seamless collaboration capabilities.

---

## Enterprise Solutions  

Hugging Face offers robust scaling options for teams and enterprises:  

- **Team Plan:** Starting at $20/user/month, includes advanced workspace collaboration tools.  
- **Enterprise Plan:** Flexible contracts designed for large-scale AI deployments with features like:  
  - Enterprise-grade security & Single Sign-On integration  
  - Granular access controls and audit logging  
  - Centralized token management and usage analytics  
  - Private dataset viewers and additional secure storage  
  - Support for advanced compute options including ZeroGPU quota boosts  
  - Organization-wide billing controls and spending monitoring  

These services help organizations build AI solutions securely, efficiently, and at scale.

---

## Company Culture & Community  

Hugging Face fosters a **collaborative and open culture** where sharing knowledge and empowering others is central. The company thrives on:  

- **Community-driven innovation:** The heart of Hugging Face is its diverse global community contributing models, datasets, and feedback that drive continuous improvement.  
- **Open source ethos:** Many of the most widely-used ML libraries and tools are hosted and developed on their platform.  
- **Transparency and ethics:** Committed to an ethical AI future, Hugging Face encourages responsible AI development and use.

---

## Careers & Opportunities  

Hugging Face is a hub for talent passionate about advancing AI. Opportunities exist across engineering, research, product, and community roles. Work with:  

- A **talented and diverse team** pushing the boundaries of AI research and technology  
- An environment that values **open collaboration and impact-driven work**  
- Cutting-edge projects shaping the future of machine learning  

Visit their careers page to join the AI revolution.

---

## Join the Future of AI  

Whether you are a machine learning practitioner, an enterprise scaling AI, or an enthusiast eager to learn and contribute, Hugging Face offers the tools and community to **accelerate your AI journey**.

- Explore models and datasets  
- Build and share your AI projects  
- Collaborate with a global network of experts  

**Website:** [huggingface.co](https://huggingface.co)  
**Community:** Active on GitHub, Twitter, LinkedIn, and Discord

---

### Brand Colors:  
- Yellow: #FFD21E  
- Orange: #FF9D00  
- Gray: #6B7280  

---

Hugging Face â€” *The AI community building the future.*