In [1]:
#imports
import os
import requests 
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [6]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [7]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [8]:
ed = Website("https://www.bloomberg.com/")
ed.links

['/feedback',
 '/notices/tos',
 '/notices/tos',
 '/feedback',
 'https://bloomberg.com',
 '/subscription']

In [9]:
# NOW WE LET THE GPT-4o-mini FIGURE OUT WHAT LINKS ARE RELEVANT
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""


In [10]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}



In [11]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [12]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://www.bloomberg.com/ - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
/feedback
/notices/tos
/notices/tos
/feedback
https://bloomberg.com
/subscription


In [13]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [14]:
huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/microsoft/VibeVoice-1.5B',
 '/xai-org/grok-2',
 '/openbmb/MiniCPM-V-4_5',
 '/Qwen/Qwen-Image-Edit',
 '/deepseek-ai/DeepSeek-V3.1',
 '/models',
 '/spaces/enzostvs/deepsite',
 '/spaces/zerogpu-aoti/wan2-2-fp8da-aoti-faster',
 '/spaces/Qwen/Qwen-Image-Edit',
 '/spaces/multimodalart/Qwen-Image-Edit-Fast',
 '/spaces/lvwerra/jupyter-agent-2',
 '/spaces',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/openai/healthbench',
 '/datasets/nvidia/Nemotron-Post-Training-Dataset-v2',
 '/datasets/liumindmind/NekoQA-10K',
 '/datasets/nvidia/Granary',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/transformers',
 '/

In [15]:
get_links("https://huggingface.co")

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'company page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [16]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [17]:
print(get_all_details("https://huggingface.co"))

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}]}
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
microsoft/VibeVoice-1.5B
Updated
about 10 hours ago
•
29.7k
•
892
xai-org/grok-2
Updated
5 days ago
•
3.52k
•
831
openbmb/MiniCPM-V-4_5
Updated
about 1 hour ago
•
3.6k


In [18]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [19]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [20]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}]}


'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nmicrosoft/VibeVoice-1.5B\nUpdated\nabout 10 hours ago\n•\n29.7k\n•\n892\nxai-org/grok-2\nUpdated\n5 days ago\n•\n3.52k\n•\n831\nopenbmb/MiniCPM-V-4_5\nUpdated\nabout 1 hour ago\n•\n3.6k\n•\n638\nQwen/Qwen-Image-Edit\nUpdated\n3 days ago\n•\n56.1k\n•\n1.47k\ndeepseek-ai/DeepSeek-V3.1\nUpdated\n2 days ago\n•\n48.4k\n•\n637\nBrowse 1M+ models\nSpaces\nRunning\n12.4k\n12.4k\nDeepSite v2\n🐳\nGe

In [21]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [22]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}]}


# Hugging Face Brochure

---

## Welcome to Hugging Face

**The AI community building the future.** Hugging Face is an innovative platform that empowers the machine learning community through collaboration on models, datasets, and applications. Whether you are a seasoned developer or a newcomer, Hugging Face enables you to explore, create, and share your work seamlessly.

---

### Our Offerings

- **Models:** Access to over 1 million state-of-the-art machine learning models, including text, image, audio, and more.
- **Datasets:** Browse through over 250,000 datasets that power various ML tasks.
- **Spaces:** Create and deploy applications with our user-friendly spaces, running thousands of ML applications.
- **Enterprise Solutions:** Tailored solutions with enterprise-grade security and dedicated support for over 50,000 organizations, including tech giants like Google, Microsoft, and Amazon.

---

### Community First

At Hugging Face, **community is at the heart of everything we do**. We strive to build the foundation of machine learning tooling collaboratively. By hosting public models and datasets, users can share their contributions and build their profiles in the ML space.

**Join our vibrant community of over 56,000 members** who are passionate about AI and constantly engaging through updates, discussions, and collaborative projects.

---

### Company Culture

Emphasizing openness and collaboration, Hugging Face fosters a culture that values diversity and innovation. Our teams work in a flexible environment, focused on the mission to advance AI for everyone, while also providing the necessary resources to support personal and professional growth.

---

### Careers at Hugging Face

We are always on the lookout for passionate individuals who share our vision for the future of AI. At Hugging Face, you can develop your skills and insights while contributing to groundbreaking projects. 

Explore job opportunities on our [Careers Page](https://huggingface.co/jobs) and join us to make a significant impact in the world of AI and machine learning.

---

### Interested in Collaborating?

Whether you’re looking for advanced ML models, datasets for your projects, or a collaborative space to innovate, **Hugging Face is here for you.**

- **Sign Up** for free to start exploring our tools and resources.
- Connect with us on social media and stay updated on our latest developments.

---

Hugging Face – Building the future of AI together.  
Explore more at [huggingface.co](https://huggingface.co)

--- 

*This brochure provides an overview of Hugging Face, ideal for prospective customers, investors, and recruits looking to understand our mission and core offerings.*

In [23]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [24]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}]}


# Hugging Face Brochure

## About Us

Hugging Face is a collaborative AI community focused on advancing the field of machine learning. We provide a robust platform where users can explore and contribute to cutting-edge models, datasets, and applications. Our mission is to empower the machine learning community by creating highly accessible tools and resources that fuel innovation and collaboration.

## Our Offerings

- **Models**: Browse over 1 million state-of-the-art AI models that are continuously updated and maintained by the community.
- **Datasets**: Access a vast library of over 250,000 datasets curated for a variety of machine learning tasks.
- **Spaces**: Host and collaborate on applications and workflows with our versatile platform, which supports text, images, video, audio, and 3D data.
- **Enterprise Solutions**: We offer customized solutions for businesses, complete with enterprise-grade security and dedicated support.

## Community Engagement

At Hugging Face, we believe in the power of community. We facilitate a vibrant ecosystem where over 50,000 organizations, including major players like Google, Amazon, and Microsoft, utilize our platform to innovate and share their work. Our open-source approach allows everyone to contribute to and benefit from the collective knowledge of the community.

## Company Culture

We pride ourselves on fostering a collaborative and inclusive environment that encourages creativity and continuous learning. Our team consists of passionate individuals from diverse backgrounds, all sharing a common goal: to shape the future of artificial intelligence. We value transparency, mutual respect, and support each other's professional development.

## Career Opportunities

Join our team in shaping the future of AI! We are constantly on the lookout for talented individuals who are excited to contribute to the AI community. We offer a variety of roles across multiple disciplines, including engineering, research, and product management. Explore our careers page and embark on an impactful journey with us.

## Connect with Us

Stay updated with our latest advancements and community initiatives through our social media channels and resources:

- [GitHub](https://github.com/huggingface)
- [Twitter](https://twitter.com/huggingface)
- [LinkedIn](https://linkedin.com/company/huggingface)
- [Discord](https://discord.gg/huggingface)

Whether you are a researcher, developer, or enthusiast, Hugging Face is your gateway to the world of machine learning and artificial intelligence. Join us in building the future!

---

**For inquiries, pricing options, or to get started, visit [Hugging Face](https://huggingface.co)**.

In [25]:
# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:

stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Hugging Face Brochure

## Company Overview
**Hugging Face** is an innovative community-driven platform focused on building the future of artificial intelligence (AI). Our mission is to foster collaboration within the machine learning (ML) community by creating and sharing state-of-the-art models, datasets, and applications. We empower individuals and organizations to explore ML more effectively through our comprehensive resources.

## Our Offerings
- **Models**: Access a vast library of over 1 million machine learning models, including popular ones like VibeVoice, Grok, and MiniCPM. 
- **Datasets**: Discover thousands of datasets curated for various ML tasks.
- **Spaces**: Engage with applications and projects developed by the community, including unique tools for text, image, audio, and video processing.
- **Enterprise Solutions**: We provide advanced features for businesses that include secure infrastructure, dedicated support, and optimized deployment options.

### Key Features
- Collaboration platform for unlimited public models and datasets.
- Easy transition to GPU services for demanding computational tasks, starting at $0.60/hour.
- Enterprise support with tailored plans for teams needing scale and security.

## Customer Base
Join over **50,000 organizations** worldwide that rely on Hugging Face, including industry giants such as:
- **Google**
- **Microsoft**
- **Amazon**
- **Meta**

Our platform serves various sectors from non-profits to large corporations, making it suitable for diverse ML applications.

## Company Culture
At Hugging Face, we embody a collaborative spirit where community engagement is at the heart of our operations. We value transparency, open-source contributions, and innovation. Our approach encourages users—researchers, data scientists, and developers—to share insights and resources, facilitating a rich exchange of knowledge while building a vibrant ecosystem around AI.

## Careers at Hugging Face
We are continually looking for passionate individuals to join our team. We offer exciting career opportunities for:
- Software Engineers
- Data Scientists
- Community Managers
- Marketing Professionals

### Benefits of Joining Us:
- Work in a dynamic and forward-thinking environment.
- Participate in groundbreaking projects and initiatives.
- Engage with a passionate community committed to shaping the future of AI.

## Connect with Us
Become a part of a community that is dedicated to building the future of machine learning. Explore our offerings, engage with our resources, and consider applying for a position with us. 

- **Website**: [huggingface.co](https://huggingface.co)
- **Follow Us**: [GitHub](https://github.com/huggingface), [Twitter](https://twitter.com/huggingface), [LinkedIn](https://linkedin.com/company/huggingface), [Discord](https://discord.com/invite/huggingface)
  
Join us at Hugging Face and help shape the AI landscape!