# A full business solution

### BUSINESS CHALLENGE:

Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits.



In [20]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
# from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [26]:
from bs4 import BeautifulSoup
import requests


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}


def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]
    
    
    
def fetch_website_links(url):
    """
    Return the links on the webiste at the given url
    I realize this is inefficient as we're parsing twice! This is to keep the code in the lab simple.
    Feel free to use a class and optimize it!
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    links = [link.get("href") for link in soup.find_all("a")]
    return [link for link in links if link]

In [27]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [28]:
links = fetch_website_links("https://huggingface.co")
links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/moonshotai/Kimi-K2-Thinking',
 '/baidu/ERNIE-4.5-VL-28B-A3B-Thinking',
 '/WeiboAI/VibeThinker-1.5B',
 '/maya-research/maya1',
 '/dx8152/Qwen-Edit-2509-Multiple-angles',
 '/models',
 '/spaces/linoyts/Qwen-Image-Edit-Angles',
 '/spaces/HuggingFaceTB/smol-training-playbook',
 '/spaces/dream2589632147/Dream-wan2-2-faster-Pro',
 '/spaces/depth-anything/depth-anything-3',
 '/spaces/tori29umai/Qwen-Image-2509-MultipleAngles',
 '/spaces',
 '/datasets/builddotai/Egocentric-10K',
 '/datasets/PleIAs/SYNTH',
 '/datasets/facebook/omnilingual-asr-corpus',
 '/datasets/nvidia/PhysicalAI-Autonomous-Vehicles',
 '/datasets/tensonaut/EPSTEIN_FILES_20K',
 '/datasets',
 '/join',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/inference/models',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/allenai',


## First step: Have GPT-5-nano figure out which links are relevant

In [29]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [30]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [31]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links
    

In [32]:
select_relevant_links("https://huggingface.co")

{'links': [{'type': 'home page', 'url': 'https://huggingface.co/'},
  {'type': 'company page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'Discord / community page',
   'url': 'https://huggingface.co/join/discord'},
  {'type': 'Discuss forum', 'url': 'https://discuss.huggingface.co'},
  {'type': 'Brand page', 'url': 'https://huggingface.co/brand'},
  {'type': 'API endpoints page', 'url': 'https://endpoints.huggingface.co'}]}

## Second step: make the brochure!

In [33]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [34]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
moonshotai/Kimi-K2-Thinking
Updated
10 days ago
‚Ä¢
153k
‚Ä¢
1.28k
baidu/ERNIE-4.5-VL-28B-A3B-Thinking
Updated
4 days ago
‚Ä¢
12.8k
‚Ä¢
471
WeiboAI/VibeThinker-1.5B
Updated
4 days ago
‚Ä¢
7.75k
‚Ä¢
310
maya-research/maya1
Updated
7 days ago
‚Ä¢
30.8k
‚Ä¢
661
dx8152/Qwen-Edit-2509-Multiple-angles
Updated
6 days ago
‚Ä¢
70.7k
‚Ä¢
673
Browse 1M+ models
Spaces
Running
on
Zero
MCP
1.06k
Qwen Image Edit Camera Control
üé¨
1.06k
Fast 4 step inference with Qwen Image Edit 2509
Running
on
CPU Upgrade
2.27k
The Smol Training Playbook
üìö
2.27k
The secrets to building world-class LLMs
Running
on
Zero
MCP
462
Dream-wan2-2-faster-Pro
üé¨

In [35]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [36]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [37]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nmoonshotai/Kimi-K2-Thinking\nUpdated\n11 days ago\n‚Ä¢\n153k\n‚Ä¢\n1.28k\nbaidu/ERNIE-4.5-VL-28B-A3B-Thinking\nUpdated\n4 days ago\n‚Ä¢\n12.8k\n‚Ä¢\n471\nWeiboAI/VibeThinker-1.5B\nUpdated\n4 days ago\n‚Ä¢\n7.75k\n‚Ä¢\n310\nmaya-research/maya1\nUpdated\n7 days ago\n‚Ä¢\n30.8k\n‚Ä¢\n661\ndx8152/Qwen-Edit-2509-Multiple-angles\nUpdated\n6 days ago\n‚Ä¢\n70.7k\n‚Ä¢\n673\nBrowse 1M+ models\nSpa

In [39]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [40]:
create_brochure("HuggingFace", "https://huggingface.co")

# Hugging Face: The AI Community Building the Future

---

## About Hugging Face

Hugging Face is the world-renowned AI platform and community dedicated to advancing the field of machine learning. It is where a global community of ML practitioners, researchers, and developers come together to **collaborate, share, and innovate** on machine learning models, datasets, and applications across all modalities‚Äîtext, image, video, audio, and even 3D.

Explore over **1 million models**, **250,000+ datasets**, and **400,000+ applications**, making it the largest and most active hub for AI development and deployment.

---

## What We Offer

### Community & Collaboration
- Host and collaborate on **unlimited public models, datasets, and applications**
- Share your work and build a professional portfolio showcasing your machine learning achievements
- Engage with a vibrant community dedicated to open source and innovation

### Hugging Face Platform
- Access cutting-edge AI models including trending state-of-the-art architectures updated frequently by the community
- Explore "Spaces" to run AI applications seamlessly on scalable infrastructure (CPU or GPU powered)
- Utilize the Hugging Face open-source stack designed to accelerate your ML workflows

### Enterprise Solutions
- Tailored **Team and Enterprise plans** starting at $20/user/month with flexible contracts
- Enterprise-grade security features including:
  - Single Sign-On (SSO) integration for secure identity management
  - Data residency options by region with audit logs for governance and compliance
  - Dedicated support to help scale AI deployments within your organization

---

## Platform Highlights

- **1M+ models** spanning diverse applications and languages
- **250K+ datasets** updated frequently from both community and industry leaders including NVIDIA and Facebook
- **Interactive Spaces** running AI demos and tools for use cases such as video generation, depth mapping, and advanced image editing
- Multi-modal support encompassing text, image, video, audio, and 3D data

---

## Company Culture

Hugging Face thrives on an **open, collaborative culture** where knowledge sharing is at the core. The company empowers developers and organizations alike to push the boundaries of AI with open-source tools, community-driven innovation, and accessible resources.

By fostering a global community, Hugging Face nurtures talent and learning, encouraging machine learning professionals to build their portfolios, gain visibility, and contribute meaningfully to the future of AI.

---

## Customers & Community

- Developers, researchers, and organizations globally use Hugging Face to create and deploy ML models faster
- Leading tech companies and research institutions rely on Hugging Face‚Äôs open ecosystem and enterprise offerings to scale AI safely and efficiently
- Continuous contributions from prominent AI research groups and major industry players enable early access to state-of-the-art models and datasets

---

## Careers at Hugging Face

Hugging Face offers exciting career opportunities for professionals passionate about AI and open source. Working at Hugging Face means:

- Collaborating with a world-class team driven by innovation and community impact
- Engaging with cutting-edge AI technologies and contributing to scalable platforms used globally
- Growing your expertise in a culture that values openness, continuous learning, and positive disruption in AI

If you want to be part of a company **building the future of AI**, consider joining Hugging Face as they continue to expand their team.

---

## Join Us

Whether you are a developer, researcher, enterprise customer, or AI enthusiast, Hugging Face is your gateway to faster, smarter, and more collaborative machine learning innovation.

- Explore models, datasets, and applications at [huggingface.co](https://huggingface.co)
- Sign up to build your ML portfolio and collaborate with the best in the field
- Contact sales for enterprise inquiries or start a team subscription to accelerate your AI projects

---

Hugging Face ‚Äî Building the future of AI, together.

## Finally - a minor improvement

With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [None]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
stream_brochure("HuggingFace", "https://huggingface.co")