In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [3]:
links = fetch_website_links("https://darshith.netlify.app/")
links

['https://darshith.netlify.app/#about',
 'https://darshith.netlify.app/#projects',
 'https://darshith.netlify.app/#skills',
 'https://darshith.netlify.app/#contact',
 'https://darshith.netlify.app/',
 'https://darshith.netlify.app/',
 'https://darshith.netlify.app/',
 'https://darshith.netlify.app/']

In [4]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [5]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [6]:
print(get_links_user_prompt("https://darshith.netlify.app/"))


Here is the list of links on the website https://darshith.netlify.app/ -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://darshith.netlify.app/#about
https://darshith.netlify.app/#projects
https://darshith.netlify.app/#skills
https://darshith.netlify.app/#contact
https://darshith.netlify.app/
https://darshith.netlify.app/
https://darshith.netlify.app/
https://darshith.netlify.app/


In [7]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links
    

In [8]:
select_relevant_links("https://darshith.netlify.app/")

{'links': [{'type': 'about page',
   'url': 'https://darshith.netlify.app/#about'},
  {'type': 'projects page', 'url': 'https://darshith.netlify.app/#projects'},
  {'type': 'skills page', 'url': 'https://darshith.netlify.app/#skills'},
  {'type': 'contact page', 'url': 'https://darshith.netlify.app/#contact'},
  {'type': 'home page', 'url': 'https://darshith.netlify.app/'}]}

In [9]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [10]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [11]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 9 relevant links
## Landing Page:

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
Tongyi-MAI/Z-Image-Turbo
Updated
5 days ago
‚Ä¢
170k
‚Ä¢
2.2k
deepseek-ai/DeepSeek-V3.2
Updated
6 days ago
‚Ä¢
18.1k
‚Ä¢
753
deepseek-ai/DeepSeek-V3.2-Speciale
Updated
6 days ago
‚Ä¢
4.72k
‚Ä¢
522
microsoft/VibeVoice-Realtime-0.5B
Updated
2 days ago
‚Ä¢
20.1k
‚Ä¢
368
alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union
Updated
5 days ago
‚Ä¢
255
Browse 1M+ models
Spaces
Running
on
Zero
MCP
Featured
1.16k
Z Image Turbo
üèÉ
1.16k
Generate images from text prompts
Running
on
Zero
313
Z Image Turbo
üñº
313
Generate stunning images from text prompts
Running
on
Zero
Featured
431
FLUX.2 [dev]
üíª


In [12]:

brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""


In [13]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [15]:
print(get_brochure_user_prompt("HuggingFace", "https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 5 relevant links

You are looking at a company called: HuggingFace
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.


## Landing Page:

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
Tongyi-MAI/Z-Image-Turbo
Updated
5 days ago
‚Ä¢
170k
‚Ä¢
2.2k
deepseek-ai/DeepSeek-V3.2
Updated
6 days ago
‚Ä¢
18.1k
‚Ä¢
753
deepseek-ai/DeepSeek-V3.2-Speciale
Updated
6 days ago
‚Ä¢
4.72k
‚Ä¢
522
microsoft/VibeVoice-Realtime-0.5B
Updated
2 days ago
‚Ä¢
20.1k
‚Ä¢
368
alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union
Updated
5 days ago
‚Ä¢
255
Browse 1M+ models
Spaces
Running
on
Z

In [16]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [18]:
(create_brochure("HuggingFace", "https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 5 relevant links


# Hugging Face: The AI Playground for Humans, Coders & Robots Alike ü§ñ‚ú®

---

## Who We Are

Welcome to **Hugging Face** ‚Äî where the coolest cats of the machine learning universe hang out, share, compete, collaborate, and build the future of AI together. Think of us as the ultimate social network for algorithms, datasets, and AI applications ‚Äî just with less cat memes and more code (well, occasionally cat memes too).

We host **over 1 million models** and **hundreds of thousands of datasets** ‚Äî from text to images, videos to audio, and even 3D objects. Want to *generate stunning images from text prompts*? Done. Fancy *real-time voice models*? Check. Need datasets for autonomous vehicles, or game-changing open-source tools? We‚Äôve got you covered.

Our motto? **Create. Collaborate. Accelerate. Repeat.**

---

## What We Offer You

### To Our Valued Users & Partners

- **A thriving community:** 50,000+ companies rely on us, including AI legends like Google, Microsoft, Amazon, Meta, Intel, Grammarly, and more.
- **Open-source powerhouses:** Transformers, Diffusers, Tokenizers, you name it. We build and nurture the ML tools that the world runs on.
- **Spaces:** Your personal AI app playground where you can deploy, demo, and share your creations seamlessly.
- **Enterprise solutions:** Got big dreams for big teams? Enterprise-grade security, priority support, SSO, private datasets, and dedicated access controls at your fingertips.
- **Compute resources:** Need GPUs for heavy lifting? Rent them by the hour, no hassle, no fuss.

---

## Our Culture: Smart, Friendly, and Just a Little Bit Quirky

- **Community-first mindset:** We believe the future of AI is open, collaborative, and inclusive.
- **Innovate fearlessly:** We‚Äôre not afraid to break stuff (responsibly), experiment, and push open-source boundaries.
- **Diversity in data and people:** From worldwide contributors to varied ML fields, diversity powers creativity here.
- **Work hard, hug often:** No seriously ‚Äî "Hugging Face" isn‚Äôt just a name, it‚Äôs a vibe. We take teamwork, wellness, and good vibes seriously.
- **Remote-friendly, global team:** You can code in pajamas from anywhere on the planet. Collaboration doesn‚Äôt require pants.

---

## Careers: Join the AI Revolution

Looking for your next challenge? At Hugging Face, you‚Äôll...

- Build tools used by millions globally.
- Work alongside some of the brightest minds in AI.
- Impact industries from healthcare to entertainment to autonomous vehicles.
- Enjoy benefits *and* brain-boosting snacks.
- Contribute to a community that values open science and innovation.

Our career page is always open, whether you‚Äôre a developer, researcher, community manager, or business guru ‚Äî come hug the future with us!

---

## Why Choose Hugging Face?

Because we turn AI fandom into AI mastery.

- **#1 hub for the latest and greatest ML models.**
- **Unparalleled community & collaboration.**
- **From hobbyists to enterprises:** scalable offerings that grow as your ambitions do.
- **We speak your language:** Python clients, APIs, and zero fees on many models.
- **Innovation served daily:** We don‚Äôt just keep up with AI ‚Äî we help create it.

---

## Get Your Hug On Today!

Ready to explore **1,000,000+ models**, join the AI community, or deploy your own AI app in minutes?

Hop on the face-train at [huggingface.co](https://huggingface.co) and start hugging the future.

---

**Hugging Face** ‚Äî Because the future is friendlier when we build it together. ü§óüíª

---

### Fun Fact

> The ‚ÄúHugging Face‚Äù emoji ü§ó isn‚Äôt just our logo ‚Äî it‚Äôs also our unofficial state emoji, representing open arms to innovation and a warm welcome to every geek, coder, and dreamer.

---

*Disclaimer: No robots or humans were harmed in the making of this brochure, only enlightened.*