In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [3]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-cou

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [6]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
htt

In [10]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links
    

In [11]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'home page', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'blog', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'partner site',
   'url': 'https://nebula.io/?utm_source=ed&utm_medium=referral'},
  {'type': 'linkedin profile', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'twitter profile', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'facebook profile',
   'url': 'https://www.facebook.com/edward.donner.52'}]}

In [12]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [16]:
select_relevant_links("https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 11 relevant links


{'links': [{'type': 'brand page', 'url': 'https://huggingface.co/brand'},
  {'type': 'company page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog', 'url': 'https://huggingface.co/blog'},
  {'type': 'learn page', 'url': 'https://huggingface.co/learn'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'Facebook page', 'url': 'https://huggingface.co/facebook'},
  {'type': 'Discussion forum', 'url': 'https://discuss.huggingface.co'}]}

In [17]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [18]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 7 relevant links
## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
Qwen/Qwen-Image-Layered
Updated
4 days ago
‚Ä¢
9.85k
‚Ä¢
591
Tongyi-MAI/Z-Image-Turbo
Updated
15 days ago
‚Ä¢
373k
‚Ä¢
3.33k
google/functiongemma-270m-it
Updated
5 days ago
‚Ä¢
21.1k
‚Ä¢
499
XiaomiMiMo/MiMo-V2-Flash
Updated
5 days ago
‚Ä¢
10.8k
‚Ä¢
418
tencent/HY-WorldPlay
Updated
5 days ago
‚Ä¢
3.31k
‚Ä¢
403
Browse 2M+ models
Spaces
Running
on
Zero
Featured
468
TRELLIS.2
üè¢
468
High-fidelity 3D Generation from images
Running
on
Zero
MCP
Featured
346
Chatterbox Turbo Demo
‚ö°
346
Chatterbox Turbo Demo
Running
on
Zero
Featured
213


In [26]:


brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [20]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] 
    return user_prompt

In [21]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 10 relevant links


'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nQwen/Qwen-Image-Layered\nUpdated\n4 days ago\n‚Ä¢\n9.85k\n‚Ä¢\n591\nTongyi-MAI/Z-Image-Turbo\nUpdated\n15 days ago\n‚Ä¢\n373k\n‚Ä¢\n3.33k\ngoogle/functiongemma-270m-it\nUpdated\n5 days ago\n‚Ä¢\n21.1k\n‚Ä¢\n499\nXiaomiMiMo/MiMo-V2-Flash\nUpdated\n5 days ago\n‚Ä¢\n10.8k\n‚Ä¢\n418\ntencent/HY-WorldPlay\nUpdated\n5 days ago\n‚Ä¢\n3.31k\n‚Ä¢\n403\nBrowse 2M+ models\nSpaces\nRunning\non\nZero\

In [22]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [23]:
create_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 9 relevant links


# Hugging Face Brochure

## About Hugging Face
Hugging Face is a pioneering AI community and collaboration platform dedicated to building the future of machine learning (ML). At its core, Hugging Face empowers ML engineers, scientists, and enthusiasts worldwide to create, share, discover, and experiment with open-source machine learning models, datasets, and applications. The platform serves as a vibrant hub where people from diverse backgrounds contribute to the advancement of ethical and open AI technologies.

## What We Offer
- **Models:** Access and collaborate on a vast repository of over 2 million pre-trained machine learning models spanning multiple modalities, including text, images, audio, video, and even 3D.
- **Datasets:** Browse and contribute to over 500,000 datasets from various domains, updated regularly to keep the community well-resourced.
- **Spaces:** Engage with customizable ML applications running on Hugging Face infrastructure, showcasing everything from high-fidelity 3D image generation to advanced text-to-image editors.
- **Community:** Join an active and fast-growing community where shared knowledge and contributions drive innovation.
- **Enterprise Solutions:** Accelerate AI workflows with paid compute resources and enterprise-grade security solutions tailored for teams and organizations.

## Company Culture
Hugging Face fosters a culture of openness, collaboration, and ethical AI development. The company thrives on a community-driven model, valuing transparency, inclusivity, and shared learning. Contributors are encouraged to build their ML portfolios, share their cutting-edge projects, and engage with like-minded peers across the globe. The platform supports unlimited public hosting, enabling unrestricted creative and scientific exploration.

## Our Customers
Hugging Face serves a wide range of users, including:
- Independent ML engineers and researchers building and sharing open-source AI models.
- Enterprises seeking advanced tools and secure platforms to develop AI applications at scale.
- Academic institutions leveraging the platform for research and education.
- AI enthusiasts and developers exploring the latest AI applications and datasets.

Our customers benefit from a rich ecosystem that accelerates innovation, reduces development time, and fosters collaboration within one of the largest AI communities worldwide.

## Careers and Opportunities
Hugging Face is continuously expanding and invites passionate individuals who want to impact the future of machine learning. Working at Hugging Face means joining a forward-thinking team dedicated to open-source values, research excellence, and building community-driven AI tools that benefit everyone.

Checklist for prospective recruits:
- Passion for AI, open-source, and ethical technology
- Desire to collaborate in a vibrant, global community
- Opportunity to work on cutting-edge ML models and applications
- Benefits of contributing to high-impact projects used worldwide

---

## Join Hugging Face Today
Explore over 2 million ML models, thousands of datasets, and hundreds of community-built applications. Build your AI skills, accelerate your projects, and be part of the future of machine learning.

**Website:** https://huggingface.co  
**Sign Up:** Join the community and start building your portfolio  
**Enterprise:** Discover solutions tailored for teams and businesses  
**Community:** Connect, collaborate, and grow with AI innovators worldwide

---

Hugging Face ‚Äì The AI community building the future.  
Create, discover, collaborate ‚Äî together shaping an open and ethical AI future.

In [24]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [25]:
stream_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 8 relevant links


# Hugging Face Brochure

## About Hugging Face

**Hugging Face** is the leading AI community and collaboration platform dedicated to building the future of machine learning (ML). Serving as a central hub for ML engineers, scientists, and enthusiasts worldwide, Hugging Face empowers the machine learning community to create, share, and explore open-source models, datasets, and AI applications across multiple modalities including text, image, video, audio, and 3D.

## What We Offer

- **Access to 2 Million+ Models and 500k+ Datasets**  
  Explore, download, and contribute to an ever-growing collection of state-of-the-art open-source ML models and datasets spanning a variety of use cases and domains.

- **Spaces for AI Applications**  
  Discover or deploy ML-powered applications (‚ÄòSpaces‚Äô) such as image generation, editing, and interactive demos that run on scalable infrastructure.

- **Collaborative Platform**  
  Host and collaborate on models, datasets, and AI apps publicly, enabling faster iteration and innovation through community engagement.

- **Open Source Stack**  
  Leverage Hugging Face‚Äôs comprehensive open-source tools and libraries to accelerate ML development and deployment.

- **Enterprise Solutions**  
  Tailored paid compute resources and enterprise-grade platforms provide security, scalability, and team collaboration capabilities for businesses building AI products.

## Community & Culture

Hugging Face‚Äôs core mission is to foster an **open and ethical AI future** through vibrant community collaboration. It is a welcoming ecosystem for anyone interested in machine learning ‚Äî from beginners building their portfolios to experts advancing research ‚Äî promoting knowledge sharing, transparency, and inclusivity.

The company culture emphasizes:

- **Collaboration:** Building together with millions of users contributing models, datasets, and projects.
- **Innovation:** Staying on the cutting edge of ML advancements across diverse modalities.
- **Openness:** Commitment to open-source principles and ethical AI development.
- **Empowerment:** Supporting the growth of ML professionals around the world with accessible tools and resources.

## Customers & Users

Hugging Face serves a diverse audience including:

- ML engineers and data scientists developing state-of-the-art models.
- Researchers advancing AI capabilities and applications.
- Enterprises integrating AI into their workflows with secure, scalable platforms.
- Educational institutions and students leveraging resources for learning and experimentation.
- Independent developers and hobbyists building AI projects and portfolios.

## Careers

Joining Hugging Face means becoming part of a pioneering team shaping the future of AI by working alongside a passionate and supportive community. The company values diversity, creativity, and a relentless drive to make machine learning accessible and ethical.

Career opportunities typically span roles in:

- Software engineering and ML research
- Product management and design
- Community engagement and developer advocacy
- Sales and enterprise solutions

Prospective recruits can expect an innovative, collaborative environment with impactful projects that connect millions of AI practitioners worldwide.

---

**Join the AI revolution at Hugging Face ‚Äì where the machine learning community builds the future together.**

Explore more and get involved at: [huggingface.co](https://huggingface.co)  
Sign up, browse models, contribute, or deploy your AI applications today!

In [27]:
stream_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 10 relevant links


# Welcome to Hugging Face ‚Äì The AI Community Building the Future (and a Lot More!)

---

## Who We Are:  
Imagine a world where AI isn‚Äôt just a sci-fi fantasy but a team sport. That‚Äôs Hugging Face for you ‚Äì the bustling community, the playground, and the launchpad all rolled into one. We‚Äôre the place where machine learning engineers, researchers, and hobbyists come to play, collaborate, and change the game.

**Our Motto?**  
*Collaborate. Innovate. Hug it out!* (Okay, maybe not literally ‚Äì but we‚Äôre warm and fuzzy at heart.)

---

## What‚Äôs Hugging Face All About?  

- **2 Million+ Machine Learning Models** ‚Äì From text to images, 3D to audio, if AI can do it, someone here has built a model for it.
- **500k+ Datasets** ‚Äì Fuel your AI projects with mountains of data, constantly updated and ready to roll.
- **1 Million+ Applications in Spaces** ‚Äì Try everything from turbo-charged chatbots to high-fidelity 3D image generators.
- **Open Source Stack** ‚Äì No secret sauce here, just collaborative recipes for success.
- **Enterprise Solutions** ‚Äì Because even your grandma‚Äôs AI project might need some secure, scalable muscle.

---

## Why Join the Hug?

- **Community & Collaboration:** You‚Äôre never alone when tackling AI challenges. Share your work, build your portfolio, and learn from the brightest minds on the planet.  
- **Explore All Modalities:** Text, image, video, audio, 3D ‚Äì we‚Äôre like the Swiss Army Knife of machine learning platforms.  
- **Fast Forward Your AI Career:** Whether you‚Äôre an aspiring ML engineer or a seasoned guru, building your profile here is like having a LinkedIn powered by neural networks and good vibes.  
- **Enterprise-Grade Power:** Leading teams? We got your back with secure, scalable solutions to bring AI magic to your organization.  

---

## The Hugging Face Culture:  
We‚Äôre a bit like your favorite neighborhood caf√© that turned into a cutting-edge AI hub. Friendly, open, and buzzing with curiosity, we celebrate openness and ethical AI development. Here, jokes fly as fast as code, and every PR (pull request, not public relations) is an opportunity to learn and grow.

---

## Who‚Äôs Using Hugging Face?  

- Researchers pushing the boundaries of AI  
- Developers cooking up the next viral AI app  
- Enterprises building AI-powered game changers  
- Curious minds eager to experiment and learn  

Everyone from independent hackers to tech giants finds their second home here.

---

## Ready to Join the AI Revolution?  

- **Dive in:** Browse over 2 million models and discover the future right at your fingertips.  
- **Create:** Host your own models, datasets, or apps and join thousands of collaborators worldwide.  
- **Accelerate:** Leverage our paid compute power or enterprise services when you‚Äôre ready to make a serious splash.  

---

### Join Hugging Face Today ‚Äî Because the Future of AI Deserves a Hug! ü§ó

Sign up now and become part of the friendliest AI community the internet has ever seen.

---

*P.S. We promise our community is more fun than your average robot takeover.*