In [None]:


import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI
import google.generativeai as genai

In [36]:
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv(override=True)

# Yahan hum Gemini ki API key hi uthayenge
api_key = os.getenv('GEMINI_API_KEY')

if api_key and len(api_key) > 10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key!")

# OpenAI SDK ko Gemini ke endpoint par point karein
client = OpenAI(
    api_key=api_key,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

# Model ka naam wahi rakhein jo aapne list mein dekha tha
MODEL = 'gemini-2.5-flash' 

print(f"Gemini client initialized using OpenAI SDK style!")

API key looks good so far
Gemini client initialized using OpenAI SDK style!


In [37]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/curriculum/',
 'https://edwarddonner.com/proficient/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://edwarddonner.com/curriculum/',
 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/',
 'https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/1

## First step: Have GPT-5-nano figure out which links are relevant

### Use a call to gpt-5-nano to read the links on a webpage, and respond in structured JSON.  
It should decide which links are relevant, and replace relative links such as "/about" with "https://company.com/about".  
We will use "one shot prompting" in which we provide an example of how it should respond in the prompt.

This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!

Sidenote: there is a more advanced technique called "Structured Outputs" in which we require the model to respond according to a spec. We cover this technique in Week 8 during our autonomous Agentic AI project.

In [38]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""


In [39]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [40]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/curriculum/
https://edwarddonner.com/proficient/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://edwarddonner.com/curriculum/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/

In [41]:
# Cell 48: JSON output ke liye function
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [44]:
select_relevant_links("https://edwarddonner.com")

Selecting relevant links for https://edwarddonner.com by calling gemini-2.5-flash
Found 8 relevant links


{'links': [{'type': 'homepage', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'curriculum page', 'url': 'https://edwarddonner.com/curriculum/'},
  {'type': 'proficiency page', 'url': 'https://edwarddonner.com/proficient/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'related company/product page', 'url': 'https://nebula.io/'}]}

In [43]:
# Cell 49: Testing
links_json = select_relevant_links("https://edwarddonner.com")
print(json.dumps(links_json, indent=4))

Selecting relevant links for https://edwarddonner.com by calling gemini-2.5-flash
Found 5 relevant links
{
    "links": [
        {
            "type": "home page",
            "url": "https://edwarddonner.com/"
        },
        {
            "type": "offerings page",
            "url": "https://edwarddonner.com/curriculum/"
        },
        {
            "type": "offerings page",
            "url": "https://edwarddonner.com/proficient/"
        },
        {
            "type": "about page",
            "url": "https://edwarddonner.com/about-me-and-about-nebula/"
        },
        {
            "type": "associated company/product page",
            "url": "https://nebula.io/?utm_source=ed&utm_medium=referral"
        }
    ]
}


In [45]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [47]:
select_relevant_links("https://edwarddonner.com")

Selecting relevant links for https://edwarddonner.com by calling gemini-2.5-flash
Found 5 relevant links


{'links': [{'type': 'homepage', 'url': 'https://edwarddonner.com/'},
  {'type': 'offerings page', 'url': 'https://edwarddonner.com/curriculum/'},
  {'type': 'expertise page', 'url': 'https://edwarddonner.com/proficient/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'associated company',
   'url': 'https://nebula.io/?utm_source=ed&utm_medium=referral'}]}

In [48]:
select_relevant_links("https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gemini-2.5-flash
Found 16 relevant links


{'links': [{'type': 'home page', 'url': 'https://huggingface.co'},
  {'type': 'products page', 'url': 'https://huggingface.co/models'},
  {'type': 'products page', 'url': 'https://huggingface.co/datasets'},
  {'type': 'products page', 'url': 'https://huggingface.co/spaces'},
  {'type': 'enterprise solutions page',
   'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'documentation page', 'url': 'https://huggingface.co/docs'},
  {'type': 'company updates page', 'url': 'https://huggingface.co/changelog'},
  {'type': 'company profile page',
   'url': 'https://huggingface.co/huggingface'},
  {'type': 'brand information page', 'url': 'https://huggingface.co/brand'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'learn page', 'url': 'https://huggingface.co/learn'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'github page', 'url': 'https://github.

## Second step: make the brochure!
Assemble all the details into another prompt to model

In [59]:
import time 
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
        time.sleep(2)
    return result

In [60]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gemini-2.5-flash
Found 7 relevant links
## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
moonshotai/Kimi-K2.5
Updated
about 6 hours ago
‚Ä¢
203k
‚Ä¢
1.69k
zai-org/GLM-OCR
Updated
2 days ago
‚Ä¢
96.3k
‚Ä¢
640
stepfun-ai/Step-3.5-Flash
Updated
2 days ago
‚Ä¢
8.69k
‚Ä¢
431
circlestone-labs/Anima
Updated
4 days ago
‚Ä¢
43.1k
‚Ä¢
412
Qwen/Qwen3-Coder-Next
Updated
1 day ago
‚Ä¢
18.7k
‚Ä¢
411
Browse 2M+ models
Spaces
Running
on
Zero
Featured
1.23k
Qwen3-TTS Demo
üéô
1.23k
Transform text into natural-sounding speech with custom voices
Running
433
Demo Playground
‚ö°
433
Free platform to access multiple AI models
Running


In [55]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# brochure_system_prompt = """
# You are an assistant that analyzes the contents of several relevant pages from a company website
# and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
# Respond in markdown without code blocks.
# Include details of company culture, customers and careers/jobs if you have the information.
# """


In [None]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:30_000] # Truncate if more than 30,000 characters
    return user_prompt

In [62]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gemini-2.5-flash
Found 18 relevant links




In [63]:
def create_brochure(company_name, url):
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [64]:
create_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gemini-2.5-flash
Found 18 relevant links


# Hugging Face: The AI Community Building the Future

Hugging Face is the leading collaboration platform empowering the machine learning community to **create, discover, and collaborate on AI better**. We are on a mission to democratize good machine learning, fostering an open and ethical AI future, one commit at a time. As a company at the heart of the AI revolution, we aim to empower the next generation of machine learning engineers, scientists, and end users to learn, collaborate, and share their work.

## Discover the Home of Machine Learning

Our platform serves as a central hub where the world's most innovative minds come together to share, explore, and build with cutting-edge AI.

### Our Core Offerings:

*   **Models:** Access and contribute to a vast collection of over **2 million machine learning models**. Explore state-of-the-art models for diverse tasks like text generation, image-to-text, text-to-speech, 3D, and more, leveraging popular libraries like PyTorch, TensorFlow, and our own Transformers and Diffusers.
*   **Datasets:** Browse and utilize over **500,000 datasets** across various modalities including text, image, audio, video, and 3D. Easily find the data you need to train and evaluate your models.
*   **Spaces:** Deploy, share, and experiment with over **1 million AI applications and demos**. Showcase your work or explore interactive applications for tasks ranging from natural language processing and image generation to music creation and robotics.

### Powering Innovation for Everyone:

Whether you're an individual developer, a growing startup, or a large enterprise, Hugging Face provides the tools and infrastructure to accelerate your ML journey:

*   **Move Faster:** Leverage our powerful open-source stack and extensive libraries for rapid development and deployment.
*   **Explore All Modalities:** Work across text, image, video, audio, and 3D with unparalleled flexibility.
*   **Build Your Portfolio:** Share your creations with the world, gain recognition, and build your professional ML profile.

## Tailored Solutions for Your Needs

We offer flexible plans designed to support every stage of your AI development and collaboration:

*   **Free Hugging Face Hub:** Join the open-source movement! Explore, experiment, collaborate, and build with a rich set of ML features, Git-based collaboration, and a vibrant community.
*   **PRO Account (starting at $9/month):** Boost your personal experience with increased private storage, more inference credits, priority access to compute, and exclusive features like Spaces Dev Mode and private dataset viewing.
*   **Team (starting at $20/user/month):** Instant setup for growing teams needing advanced security, access controls (SSO, Audit Logs, Resource Groups), centralized token management, analytics, advanced compute options, private storage, and priority support.
*   **Enterprise (Custom Pricing):** Designed for large organizations, offering all Team plan benefits plus the highest storage, bandwidth, and API limits, managed billing, legal & compliance support, and personalized onboarding.

## Join Our Thriving Community

Hugging Face is more than a platform; it's a dynamic community committed to open and ethical AI.

*   **Learn & Grow:** Dive into our comprehensive courses on LLMs, Robotics, Deep RL, Diffusion Models, and more. Access extensive documentation and a rich blog with articles on cutting-edge research and practical guides.
*   **Collaborate & Connect:** Engage in discussions on our active community forums, contribute to open-source projects on GitHub, and stay connected with the latest trends.

## Careers at Hugging Face

Do you want to democratize good machine learning and build an open AI future? We are a rapidly growing team, and if our mission resonates with you, we invite you to **join us!** While specific job openings are dynamic, we are always looking for passionate individuals eager to contribute to the AI revolution.

**Hugging Face: Where the future of AI is built, together.**

## Finally - a minor improvement

With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [72]:
def stream_brochure(company_name, url):
    stream = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [70]:
stream_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gemini-2.5-flash
Found 7 relevant links


**Hugging Face: The AI Community Building the Future**

Welcome to Hugging Face, the leading collaboration platform at the heart of the AI revolution. We empower the machine learning community to create, discover, and build the future of AI together. Founded in 2016, our mission is to solve and democratize artificial intelligence through natural language and beyond, fostering an open and ethical AI future.

---

**For Prospective Customers: Accelerate Your AI Journey**

Whether you're an individual developer, a growing team, or a large enterprise, Hugging Face provides the tools and community to bring your AI projects to life.

**The Hugging Face Hub: Your Home for Machine Learning**
*   **Vast Resources:** Explore and interact with over 2 million models, 500,000 datasets, and 1 million applications (Spaces) across all modalities‚Äîtext, image, video, audio, and 3D.
*   **Seamless Collaboration:** Our Git-based platform is designed for collaboration, allowing you to host and share unlimited public models, datasets, and applications, build your ML portfolio, and connect with a vibrant global community.
*   **Open Source First:** Move faster with the robust Hugging Face open-source stack, supporting innovation and accessibility in ML.

**Advanced Solutions for Teams & Enterprises**
For organizations seeking enhanced capabilities, Hugging Face offers tailored plans:
*   **PRO Account:** Boost your personal experience with 10x private storage, 20x inference credits, and premium compute access.
*   **Team Plan:** Instant setup for growing teams with enterprise-grade security features like SSO, data location management, audit logs, granular access controls, and analytics.
*   **Enterprise Solution:** For large organizations, enjoy all Team benefits plus the highest storage/bandwidth limits, managed billing, legal/compliance support, and personalized priority support. Maximize scalability and performance with advanced compute options like ZeroGPU.

Join the most forward-thinking AI organizations leveraging Hugging Face to build AI with confidence and control.

---

**For Investors: Investing in the Future of AI**

Hugging Face stands at the epicenter of the rapidly expanding AI market. As the premier collaboration platform for machine learning, we are uniquely positioned for substantial growth.
*   **Market Leadership:** We host a fast-growing community, popular open-source ML libraries, and tools, with a talented science team pushing the boundaries of technology.
*   **Scalable Business Model:** Our tiered pricing structure (PRO, Team, Enterprise) offers diverse revenue streams, catering to individual contributors up to global enterprises, complemented by our foundational free open-source hub.
*   **Impactful Mission:** By democratizing AI and fostering open, ethical development, Hugging Face is not just a technology company but a movement shaping the future of artificial intelligence.

---

**For Recruits: Join the AI Community Building the Future**

At Hugging Face, you'll be part of a dynamic and impactful team dedicated to solving and democratizing artificial intelligence. We believe in empowering the next generation of machine learning engineers, scientists, and end users to learn, collaborate, and share their work.

*   **Innovative Environment:** Work at the cutting edge of AI, contributing to widely used open-source ML libraries, models, and tools that power the AI revolution.
*   **Collaborative Culture:** We foster an open, learning-oriented environment where contributions are valued, and knowledge sharing is central. Our team includes a talented science group actively exploring new frontiers in ML.
*   **Impactful Work:** Your work will directly contribute to an open and ethical AI future, making machine learning more accessible and powerful for everyone.
*   **Learning & Growth:** Access extensive learning resources, including specialized courses on LLMs, Robotics, Deep RL, Computer Vision, and more, enabling continuous professional development.

While our careers page highlights specific openings, we are always looking for passionate individuals ready to make a significant impact on the AI landscape.

---

**Learn More & Connect:**

Explore the Hugging Face Hub, browse our models, datasets, and applications, or learn how our Enterprise solutions can accelerate your organization's AI journey.

*   **Explore:** huggingface.co
*   **Contact Sales:** For Enterprise inquiries
*   **Join the Community:** Be part of the conversation and help build the future of AI.