In [1]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [3]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 '

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [6]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
https://edwar

In [7]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [8]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'company page',
   'url': 'https://nebula.io/?utm_source=ed&utm_medium=referral'},
  {'type': 'homepage', 'url': 'https://edwarddonner.com/'},
  {'type': 'linkedin page', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'twitter profile', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'facebook page',
   'url': 'https://www.facebook.com/edward.donner.52'},
  {'type': 'patent',
   'url': 'https://patents.google.com/patent/US20210049536A1/'},
  {'type': 'blog', 'url': 'https://edwarddonner.com/posts/'}]}

In [9]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [10]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

## Landing Page:

Hugging Face – The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
deepseek-ai/DeepSeek-OCR
Updated
2 days ago
•
841k
•
1.97k
PaddlePaddle/PaddleOCR-VL
Updated
3 days ago
•
17.3k
•
1.1k
tencent/HunyuanWorld-Mirror
Updated
2 days ago
•
12.5k
•
351
MiniMaxAI/MiniMax-M2
Updated
about 3 hours ago
•
570
•
349
krea/krea-realtime-video
Updated
7 days ago
•
1.6k
•
210
Browse 1M+ models
Spaces
Running
on
Zero
284
284
DeepSeek OCR Demo
🆘
An interactive demo for the DeepSeek-OCR model.
Running
15.4k
15.4k
DeepSite v3
🐳
Generate any application by Vibe Coding
Running
530
530
veo3.1-fast
🐨
Generate videos from text or images
Running
2.09k
2.09k
Wan2.2 Animate
👁
Wan2.2 Animate
Running
on
Zero
MCP
1.94k
1.94k


In [11]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [12]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [13]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face – The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\ndeepseek-ai/DeepSeek-OCR\nUpdated\n2 days ago\n•\n841k\n•\n1.97k\nPaddlePaddle/PaddleOCR-VL\nUpdated\n3 days ago\n•\n17.3k\n•\n1.1k\ntencent/HunyuanWorld-Mirror\nUpdated\n2 days ago\n•\n12.5k\n•\n351\nMiniMaxAI/MiniMax-M2\nUpdated\nabout 3 hours ago\n•\n570\n•\n349\nkrea/krea-realtime-video\nUpdated\n7 days ago\n•\n1.6k\n•\n210\nBrowse 1M+ models\nSpaces\nRunning\non\nZero\n284\n284\nDeepSe

In [14]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [15]:
create_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face – The AI Community Building the Future

---

## About Hugging Face

Hugging Face is the premier collaboration platform for the machine learning (ML) community. Serving as a central hub, Hugging Face empowers ML engineers, researchers, scientists, and end users worldwide to share, explore, and experiment with open-source machine learning models, datasets, and applications. Their mission is to democratize access to good machine learning, fostering an open and ethical AI future built together by the community.

With a rapidly growing community and some of the most widely used open-source ML libraries, Hugging Face stands at the forefront of the AI revolution, enabling innovation and collaboration that spans all AI modalities including text, image, video, audio, and even 3D.

---

## What Does Hugging Face Offer?

- **Extensive Model Hub**: Access over 1 million pre-trained models including cutting-edge NLP, computer vision, and multimodal models.
- **Vast Dataset Repository**: Explore more than 250,000 datasets curated for diverse AI tasks.
- **Spaces**: Host and run AI-powered applications interactively, facilitating easy sharing and demos of ML apps.
- **Open Source Stack**: Accelerate development and deployment with Hugging Face’s open-source tools and libraries.
- **Enterprise Solutions**: Dedicated paid compute resources, enterprise-grade security, and prioritized support to empower teams and organizations.
- **Community-First Environment**: Build your ML portfolio and learn collaboratively within a thriving global ecosystem.

---

## Company Culture

Hugging Face thrives on openness, cooperation, and ethical AI development. The team is composed of around 200+ passionate individuals who are dedicated to making machine learning accessible and inclusive. They foster a collaborative environment where sharing knowledge, contributing to open source, and pushing technological boundaries are fundamental values.

The company encourages:
- Community engagement and collaboration
- Continuous learning and innovation
- Transparency and ethical AI practices
- Empowerment of the next generation of AI practitioners

If “democratizing machine learning one commit at a time” resonates with you, Hugging Face warmly invites you to join their mission.

---

## Customers & Community

Hugging Face’s users range from individual AI researchers to large enterprises looking to integrate state-of-the-art machine learning solutions safely and efficiently. The platform’s open source nature and extensive library serve companies, academic institutions, startups, and hobbyists alike.

Community highlights:
- 65,000+ followers on the platform
- Thousands of contributors continuously updating and creating models and datasets
- Vibrant forums, Discord channels, and events to engage with peers and experts
- Dedicated learning resources like tutorials and blog articles

---

## Careers at Hugging Face

Joining Hugging Face means working alongside some of the brightest minds in AI and machine learning. The company is actively growing and regularly hires talent passionate about AI, software engineering, research, and community building.

Why work at Hugging Face?
- Opportunity to shape the future of AI and open-source ML
- Collaborative and inclusive work culture
- Support for remote and flexible working arrangements
- Access to cutting-edge AI projects and research
- Competitive compensation and benefits

Explore current openings and become part of a mission-driven team that’s building the future of AI, one model at a time.

---

## Connect with Hugging Face

- Website: [https://huggingface.co](https://huggingface.co)  
- GitHub: github.com/huggingface  
- Twitter: @huggingface  
- LinkedIn: Hugging Face  
- Discord: Active community for discussions and support

---

Embrace the future of AI with Hugging Face — where collaboration fosters innovation and democratizes machine learning for all.