In [36]:
import os
import json
from dotenv import load_dotenv
from scraper import fetch_website_contents, fetch_website_links
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Check the API key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it is in the wrong format; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [31]:
openai = OpenAI()
MODEL = "gpt-5-nano"
MODEL_1 = "gpt-4.1-mini"

In [5]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-cou

In [6]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
def get_links_user_prompt(url):
    user_prompt = f"""Here is the list of links on the website {url} -
        Please decide which of these are relevant web links for a brochure about the company, 
        respond with the full https URL in JSON format.
        Do not include Terms of Service, Privacy, email links.

        Links (some might be relative links):
    """
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [8]:
print(get_links_user_prompt("https://edwarddonner.com"))

Here is the list of links on the website https://edwarddonner.com -
        Please decide which of these are relevant web links for a brochure about the company, 
        respond with the full https URL in JSON format.
        Do not include Terms of Service, Privacy, email links.

        Links (some might be relative links):
    https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-a

In [12]:
def select_relevant_urls(url):
    user_prompt = get_links_user_prompt(url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": 'system', "content": link_system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        response_format={"type": "json_object"}
        )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [13]:
response = select_relevant_urls("https://edwarddonner.com")

In [14]:
response

{'links': [{'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'company page',
   'url': 'https://nebula.io/?utm_source=ed&utm_medium=referral'},
  {'type': 'home page', 'url': 'https://edwarddonner.com/'},
  {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'linkedin', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'twitter', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'facebook', 'url': 'https://www.facebook.com/edward.donner.52'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/11/11/ai-live-event/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/'},
  {'type': 'blog post',
   'url': 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/'},
  {'type': 'pr

In [15]:
response_2 = select_relevant_urls("https://oloruntobiolutola.github.io/")

In [16]:
response_2

{'links': [{'type': 'LinkedIn',
   'url': 'https://www.linkedin.com/in/oloruntobi-olutola-22654a224'},
  {'type': 'GitHub', 'url': 'https://github.com/OloruntobiOlutola'},
  {'type': 'Location (Google Maps)',
   'url': 'https://www.google.com/maps?q=Orsay,+France'}]}

In [26]:
def fetch_page_and_all_relevant_links(url):
    content = fetch_website_contents(url)
    relevant_links = select_relevant_urls(url)
    result = f"## Landing Page:\n\n{content}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [19]:
response = fetch_page_and_all_relevant_links("https://oloruntobiolutola.github.io/")

In [20]:
response

## Landing Page:

Oloruntobi Olutola - Software Engineer

OOP
Oloruntobi Paul Olutola
Software & Data Engineer | AI & ML Enthusiast | Mentor
üìß oloruntobiolutola@gmail.com
üìç Orsay, France
üìû +33 745 373 399
üíº Open to Work
üêô GitHub
üíº LinkedIn
üë®‚Äçüíª About Me
Results-driven Software and Data Engineer with over 4 years of professional experience, specializing in TypeScript, NestJS, Angular, and expanding into AI/ML. First-Class Honours graduate in Computer Engineering from the Federal University of Technology, Akure (4.59/5.00 GPA). Currently pursuing a Master's degree in Data Science at the Universit√© Paris Saclay, focusing on Machine Learning, Natural Language Processing, and Large Language Models. Passionate about leveraging AI to solve real-world problems and dedicated to mentoring aspiring engineers, I have successfully trained and guided numerous developers throughout my career.
üìä Quick Stats
4+
Years Experience
4.59
GPA (First-Class)
15+
Technologies
20+
Engineers Mentored
üõ†Ô∏è Technical Skills
Data Science & AI
Machine Learning
Deep Learning
NLP
LLMs
Data Analysis
TensorFlow
PyTorch
Scikit-learn
Frontend
Angular
React
React Native
HTML
CSS
Backend
Node.js
NestJS
Express.js
Spring Boot
Databases
PostgreSQL
MongoDB
MySQL
Cloud & DevOps
AWS
Docker
GitHub
Git
Languages
Python
TypeScript
JavaScript
Java
C#
Data Tools
Pandas
NumPy
Jupyter
Matplotlib
Seaborn
Other
Microservices
Linux
Agile
Teaching
üíº Professional Experience
Freelance Software Engineer
Elunic AG - Germany
August 2024 - Present
Delivering high-quality software solutions for enterprise clients in Germany
Working remotely on full-stack development projects
Collaborating with international teams across different time zones
Implementing scalable and maintainable software architectures
Full Stack Engineer
Seamfix Limited - Lagos, Nigeria
April 2022 - February 2025
Integrated third-party APIs for verifying Nigerian digital identity numbers and performing face matches
Developed microservices for bulk identity verification and face mat
## Relevant Links:


### Link: GitHub profile
OloruntobiOlutola (Olutola Oloruntobi Paul) ¬∑ GitHub

Skip to content
Navigation Menu
Toggle navigation
Sign in
Appearance settings
Platform
AI CODE CREATION
GitHub Copilot
Write better code with AI
GitHub Spark
Build and deploy intelligent apps
GitHub Models
Manage and compare prompts
MCP Registry
New
Integrate external tools
DEVELOPER WORKFLOWS
Actions
Automate any workflow
Codespaces
Instant dev environments
Issues
Plan and track work
Code Review
Manage code changes
APPLICATION SECURITY
GitHub Advanced Security
Find and fix vulnerabilities
Code security
Secure your code as you build
Secret protection
Stop leaks before they start
EXPLORE
Why GitHub
Documentation
Blog
Changelog
Marketplace
View all features
Solutions
BY COMPANY SIZE
Enterprises
Small and medium teams
Startups
Nonprofits
BY USE CASE
App Modernization
DevSecOps
DevOps
CI/CD
View all use cases
BY INDUSTRY
Healthcare
Financial services
Manufacturing
Government
View all industries
View all solutions
Resources
EXPLORE BY TOPIC
AI
Software Development
DevOps
Security
View all topics
EXPLORE BY TYPE
Customer stories
Events & webinars
Ebooks & reports
Business insights
GitHub Skills
SUPPORT & SERVICES
Documentation
Customer support
Community forum
Trust center
Partners
Open Source
COMMUNITY
GitHub Sponsors
Fund open source developers
PROGRAMS
Security Lab
Maintainer Community
Accelerator
Archive Program
REPOSITORIES
Topics
Trending
Collections
Enterprise
ENTERPRISE SOLUTIONS
Enterprise platform
AI-powered developer platform
AVAILABLE ADD-ONS
GitHub Advanced Security
Enterprise-grade security features
Copilot for Business
Enterprise-grade AI features
Premium Support
Enterprise-grade 24/7 support
Pricing
Search or jump to...
Search code, repositories, users, issues, pull requests...
Search
Clear
Search syntax tips
Provide feedback
We read every piece of feedback, and take your input very seriously.
Include my email address so I can be contacted
Cancel
Submit feedback
Saved searches
Use saved searches to filte

### Link: LinkedIn profile
No title found



### Link: Location / map
Avant d'acc√©der √† Google¬†Maps

FR
Fran√ßais
France
Deutsch
English
Espa√±ol
Italiano
ÿßŸÑÿπÿ±ÿ®Ÿäÿ©
Toutes les langues
Afrikaans
az…ôrbaycan
bosanski
catal√†
ƒåe≈°tina
Cymraeg
Dansk
Deutsch
eesti
English
United Kingdom
English
United States
Espa√±ol
Espa√±a
Espa√±ol
Latinoam√©rica
euskara
Filipino
Fran√ßais
Canada
Gaeilge
galego
Hrvatski
Indonesia
isiZulu
√≠slenska
Italiano
Kiswahili
latvie≈°u
lietuvi≈≥
magyar
Melayu
Nederlands
norsk
o‚Äòzbek
polski
Portugu√™s
Brasil
Portugu√™s
Portugal
rom√¢nƒÉ
shqip
Slovenƒçina
sloven≈°ƒçina
srpski (latinica)
Suomi
Svenska
Ti·∫øng Vi·ªát
T√ºrk√ße
ŒïŒªŒªŒ∑ŒΩŒπŒ∫Œ¨
–±–µ–ª–∞—Ä—É—Å–∫–∞—è
–±—ä–ª–≥–∞—Ä—Å–∫–∏
–∫—ã—Ä–≥—ã–∑—á–∞
“õ–∞–∑–∞“õ —Ç—ñ–ª—ñ
–º–∞–∫–µ–¥–æ–Ω—Å–∫–∏
–º–æ–Ω–≥–æ–ª
–†—É—Å—Å–∫–∏–π
—Å—Ä–ø—Å–∫–∏
–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞
·É•·Éê·É†·Éó·É£·Éö·Éò
’∞’°’µ’•÷Ä’•’∂
◊¢◊ë◊®◊ô◊™
ÿßÿ±ÿØŸà
ÿßŸÑÿπÿ±ÿ®Ÿäÿ©
ŸÅÿßÿ±ÿ≥€å
·ä†·àõ·à≠·äõ
‡§®‡•á‡§™‡§æ‡§≤‡•Ä
‡§Æ‡§∞‡§æ‡§†‡•Ä
‡§π‡§ø‡§®‡•ç‡§¶‡•Ä
‡¶Ö‡¶∏‡¶Æ‡ßÄ‡¶Ø‡¶º‡¶æ
‡¶¨‡¶æ‡¶Ç‡¶≤‡¶æ
‡®™‡©∞‡®ú‡®æ‡®¨‡©Ä
‡™ó‡´Å‡™ú‡™∞‡™æ‡™§‡´Ä
‡¨ì‡¨°‡¨º‡¨ø‡¨Ü
‡Æ§‡ÆÆ‡Æø‡Æ¥‡Øç
‡∞§‡±Ü‡∞≤‡±Å‡∞ó‡±Å
‡≤ï‡≤®‡≥ç‡≤®‡≤°
‡¥Æ‡¥≤‡¥Ø‡¥æ‡¥≥‡¥Ç
‡∑É‡∑í‡∂Ç‡∑Ñ‡∂Ω
‡πÑ‡∏ó‡∏¢
‡∫•‡∫≤‡∫ß
·Äô·Äº·Äî·Ä∫·Äô·Ä¨
·ûÅ·üí·ûò·üÇ·ûö
ÌïúÍµ≠Ïñ¥
Êó•Êú¨Ë™û
ÁÆÄ‰Ωì‰∏≠Êñá
ÁπÅÈ´î‰∏≠Êñá
ÁπÅÈ´î‰∏≠Êñá
È¶ôÊ∏Ø
Se connecter
FR
Fran√ßais
France
Deutsch
English
Espa√±ol
Italiano
ÿßŸÑÿπÿ±ÿ®Ÿäÿ©
Toutes les langues
Afrikaans
az…ôrbaycan
bosanski
catal√†
ƒåe≈°tina
Cymraeg
Dansk
Deutsch
eesti
English
United Kingdom
English
United States
Espa√±ol
Espa√±a
Espa√±ol
Latinoam√©rica
euskara
Filipino
Fran√ßais
Canada
Gaeilge
galego
Hrvatski
Indonesia
isiZulu
√≠slenska
Italiano
Kiswahili
latvie≈°u
lietuvi≈≥
magyar
Melayu
Nederlands
norsk
o‚Äòzbek
polski
Portugu√™s
Brasil
Portugu√™s
Portugal
rom√¢nƒÉ
shqip
Slovenƒçina
sloven≈°ƒçina
srpski (latinica)
Suomi
Svenska
Ti·∫øng Vi·ªát
T√ºrk√ße
ŒïŒªŒªŒ∑ŒΩŒπŒ∫Œ¨
–±–µ–ª–∞—Ä—É—Å–∫–∞—è
–±—ä–ª–≥–∞—Ä—Å–∫–∏
–∫—ã—Ä–≥—ã–∑—á–∞
“õ–∞–∑–∞“õ —Ç—ñ–ª—ñ
–º–∞–∫–µ–¥–æ–Ω—Å–∫–∏
–º–æ–Ω–≥–æ–ª
–†—É—Å—Å–∫–∏–π
—Å—Ä–ø—Å–∫–∏
–£–∫—Ä–∞—ó–Ω—Å—å–∫–∞
·É•·Éê·É†·Éó·É£·Éö·Éò
’∞’°’µ’•÷Ä’•’∂
◊¢◊ë◊®◊ô◊™
ÿßÿ±ÿØŸà
ÿßŸÑÿπÿ±ÿ®Ÿäÿ©
ŸÅÿßÿ±ÿ≥€å
·ä†·àõ·à≠·äõ
‡§®‡•á‡§™‡§æ‡§≤‡•Ä
‡§Æ‡§∞‡§æ‡§†‡•Ä
‡§π‡§ø‡§®‡•ç‡§¶‡•Ä
‡¶Ö‡¶∏‡¶Æ‡ßÄ‡¶Ø‡¶º‡¶æ
‡¶¨‡¶æ‡¶Ç‡¶≤‡¶æ
‡®™‡©∞‡®ú‡®æ‡®¨‡©Ä
‡™ó‡´Å‡™ú‡™∞‡™æ‡™§‡´Ä
‡¨ì‡¨°‡¨º‡¨ø‡¨Ü
‡Æ§‡ÆÆ‡Æø‡Æ¥‡Øç
‡∞§‡±Ü‡∞≤‡±Å‡∞ó‡±Å
‡≤ï‡≤®‡≥ç‡≤®‡≤°
‡¥Æ‡¥≤‡¥Ø‡¥æ‡¥≥‡¥Ç
‡∑É‡∑í‡∂Ç‡∑Ñ‡∂Ω
‡πÑ‡∏ó‡∏¢
‡∫•‡∫≤‡∫ß
·Äô·Äº·Äî·Ä∫·Äô·Ä¨
·ûÅ·üí·ûò·üÇ·ûö
ÌïúÍµ≠Ïñ¥
Êó•Êú¨Ë™û
ÁÆÄ‰Ωì‰∏≠Êñá
ÁπÅÈ´î‰∏≠Êñá
ÁπÅÈ´î‰∏≠Êñá
È¶ôÊ∏Ø
Se connecter
Avant d'acc√©der √† Google
Nous utilisons des
cookies
et d'autres donn√©es pour¬†:
Proposer les services Google et s'assurer qu'ils fonctionnent correctement
Suivre les interruptions de service et prot√©ger contre le spam, les fraudes et les abus
Mesurer l'engagement de l'audience et les statistiques des sites pour comprendre la fa√ßon dont nos services s

In [23]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [30]:
def get_brochure_user_prompt(company_name: str, url: str):
    user_prompt = f"""
    You are looking at a company called: {company_name}
    Here are the contents of its landing page and other relevant pages;
    use this information to build a short brochure of the company in markdown without code blocks.\n\n
    """
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [28]:
user_prompt = get_brochure_user_prompt("HuggingFace", "https://huggingface.co/")

In [29]:
user_prompt

'\n    You are looking at a company called: HuggingFace\n    Here are the contents of its landing page and other relevant pages;\n    use this information to build a short brochure of the company in markdown without code blocks.\n\n\n    ## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nzai-org/GLM-4.7\nUpdated\n4 days ago\n‚Ä¢\n15.8k\n‚Ä¢\n1.03k\nQwen/Qwen-Image-Layered\nUpdated\n8 days ago\n‚Ä¢\n14.9k\n‚Ä¢\n787\nQwen/Qwen-Image-Edit-2511\nUpdated\n4 days ago\n‚Ä¢\n14.5k\n‚Ä¢\n437\nMiniMaxAI/MiniMax-M2.1\nUpdated\n1 day ago\n‚Ä¢\n15.9k\n‚Ä¢\n422\ngoogle/functiongemma-270m-it\nUpdated\n9 days ago\n‚Ä¢\n33.6k\n‚Ä¢\n645\nBrowse 2M+ models\nSpaces\nRunning\n

In [37]:
def create_brochure(company_name: str, url: str):
    user_prompt = get_brochure_user_prompt(company_name, url)
    stream = openai.chat.completions.create(
        model = MODEL_1,
        messages = [
            {"role": 'system', "content": brochure_system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        stream=True
        )
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [38]:
create_brochure("HuggingFace", "https://huggingface.co/")

# Hugging Face Brochure

---

## About Hugging Face

**Hugging Face** is the vibrant AI community and collaboration platform driving the future of artificial intelligence and machine learning. It serves as a central hub where the global ML community can share, explore, and build on cutting-edge models, datasets, and AI applications.

With over 2 million models, 500,000+ datasets, and a growing ecosystem of 1 million+ applications, Hugging Face empowers machine learning engineers, scientists, and enthusiasts to innovate rapidly and build an open, ethical AI future together.

---

## What We Offer

- **Models:** Access and contribute to a vast library of over 2 million machine learning models spanning text, image, video, audio, and even 3D modalities.
- **Datasets:** Explore more than 500,000 datasets contributed by the community and industry leaders, supporting diverse AI research and applications.
- **Spaces:** Host and deploy AI-powered apps and demos effortlessly, using community or enterprise-grade compute resources.
- **Community:** Join a thriving, collaborative community dedicated to open-source AI progress, with forums, discussions, and educational resources.
- **Enterprise Solutions:** Benefit from advanced AI infrastructure with robust security, access controls, and dedicated support designed for teams and organizations.

---

## Why Choose Hugging Face?

- **Open Collaboration:** Hugging Face is the go-to platform for democratizing AI and open-source innovation in machine learning.
- **Cutting-Edge Innovation:** Supported by a talented science team at the frontier of AI research and backed by fast-evolving open-source tools.
- **Multimodal AI:** Explore state-of-the-art models and datasets across all types of data‚Äîtext, images, audio, video, and 3D.
- **Community Growth:** Engage with a fast-growing, passionate community that shares knowledge, builds portfolios, and accelerates learning.
- **Enterprise-Grade Security:** Work confidently with trusted tools that scale securely within business environments.

---

## Our Customers and Users

From researchers and engineers to businesses and AI enthusiasts, Hugging Face serves a diverse customer base including:

- AI research labs and academic institutions
- Tech companies building AI-driven products and solutions
- Data scientists seeking collaborative resources
- Developers deploying AI applications at scale
- Enterprises requiring secure, scalable AI infrastructure

---

## Company Culture

At Hugging Face, the culture is rooted in **openness, collaboration, and ethical AI development**. The team is passionate about pushing the boundaries of machine learning in a responsible way, fostering innovation through community-driven projects and inclusive participation. Creativity and continuous learning are highly valued, with a strong focus on sharing knowledge openly.

---

## Careers & Opportunities

Join Hugging Face and be part of the AI revolution!

- **Growth Environment:** Work alongside AI researchers and engineers pushing the frontier in open-source projects.
- **Impact:** Contribute to tools and models used globally by millions.
- **Supportive Culture:** Inclusive, mission-driven, and committed to ethical AI adoption.
- **Roles:** Engineering, research science, community & developer relations, product management, and enterprise solutions.

Explore current openings and apply via their Careers page to build the future of AI with Hugging Face.

---

## Connect & Explore More

- Website: [huggingface.co](https://huggingface.co)
- Community Forum & Docs: Extensive learning and collaboration resources
- Social: GitHub, Twitter, LinkedIn, Discord

---

**Hugging Face ‚Äì The AI community building the future.**  
Empower your machine learning projects with open collaboration, state-of-the-art resources, and a global community.  
Join us to create, share, and discover better AI today.