In [35]:
# Import all packages
from dotenv import load_dotenv
import os
import json
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
import requests
import openai

In [36]:
# Load environment variables
load_dotenv()

# Get the API key from the environment
api_key = os.getenv("OPEN_AI_API_KEY")
openai.api_key = api_key

# Check if the API key looks valid
if api_key and api_key.startswith("sk-proj-"):
    print("API key looks good so far")
else:
    print("There might be a problem with your API KEY. Please visit the troubleshooting notebook")

# Set your model
MODEL = "gpt-4o-mini"


API key looks good so far


In [37]:
# Scrape the any website using the beautiful soup package
class Website:
    url:str
    title:str
    body:str
    links:str
    text:str
    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")
        self.title = soup.title.string if soup.title else "No Title Found"

        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get("href") for link in soup.find_all("a")]
        self.links = [link for link in links if link]

    def get_content(self):
        return f"webpage Title:\n{self.title}\nwebpage contents:\n{self.text}\n\n"
        

In [38]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-

In [39]:
link_system_prompt = "You are provided with a list of link found on a webpage. \ You are able to decide which of the links would be most relevant to include in a brochure about the company, \ such as links to an About page, or a Company page, or career/jobs page.\n"

link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """""
{
"links":[

{"type": "about page", "url": "https://full.url/goes/here/about"}
{"type": "career page": "https://another.full.url/careers"}


]
}
"""""

In [40]:
def  get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} -"
    user_prompt += "Please decide which of these are relevant web links for a brochure about the company, response with the full https URL Do not include Terms of service, Privacy, email links.\n"
    user_prompt += "Links (some might be relevant links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [7]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com -Please decide which of these are relevant web links for a brochure about the company, response with the full https URL Do not include Terms of service, Privacy, email links.
Links (some might be relevant links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddonner.com/2025/05

In [41]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com -Please decide which of these are relevant web links for a brochure about the company, response with the full https URL Do not include Terms of service, Privacy, email links.
Links (some might be relevant links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddonner.com/2025/05

In [42]:
# Getting the link from open AI model 
def get_links(url):
    Website_instance = Website(url)
    response = openai.chat.completions.create(model=MODEL, messages=[
        {"role": "system", "content":link_system_prompt},
        {"role": "user", "content":get_links_user_prompt(Website_instance)}
    ],
    response_format = {"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [25]:
anthropic = Website("https://anthropic.com")
anthropic.links

['#main',
 '#footer',
 'https://www.anthropic.com/',
 'https://www.anthropic.com/claude',
 'https://www.anthropic.com/max',
 'https://www.anthropic.com/team',
 'https://www.anthropic.com/enterprise',
 'https://www.anthropic.com/pricing',
 'https://claude.ai/download',
 'https://claude.ai/',
 'https://www.anthropic.com/news/claude-character',
 'https://www.anthropic.com/api',
 'https://docs.anthropic.com/',
 'https://www.anthropic.com/pricing#api',
 'https://console.anthropic.com/',
 'https://docs.anthropic.com/en/docs/welcome',
 'https://www.anthropic.com/solutions/agents',
 'https://www.anthropic.com/solutions/coding',
 'https://www.anthropic.com/solutions/customer-support',
 'https://www.anthropic.com/solutions/education',
 'https://www.anthropic.com/solutions/financial-services',
 'https://www.anthropic.com/solutions/government',
 'https://www.anthropic.com/customers',
 'https://www.anthropic.com/research',
 'https://www.anthropic.com/economic-index',
 'https://www.anthropic.com/cla

In [43]:
get_links("https://anthropic.com")

{'links': [{'type': 'home page', 'url': 'https://www.anthropic.com/'},
  {'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'career page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'},
  {'type': 'news page', 'url': 'https://www.anthropic.com/news'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'},
  {'type': 'events page', 'url': 'https://www.anthropic.com/events'},
  {'type': 'solutions page', 'url': 'https://www.anthropic.com/solutions'},
  {'type': 'customers page', 'url': 'https://www.anthropic.com/customers'},
  {'type': 'transparency page',
   'url': 'https://www.anthropic.com/transparency'},
  {'type': 'learn page', 'url': 'https://www.anthropic.com/learn'}]}

In [44]:
def get_all_details(url):
    result = "Landing Page:\n"
    result += Website(url).get_content()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_content()
    return result

In [45]:
print(get_all_details("https://anthropic.com"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'career page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'products page', 'url': 'https://www.anthropic.com/claude'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}, {'type': 'events page', 'url': 'https://www.anthropic.com/events'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'solutions page', 'url': 'https://www.anthropic.com/solutions'}]}
Landing Page:
webpage Title:
Home \ Anthropic
webpage contents:
Skip to main content
Skip to footer
Claude
Chat with Claude
Overview
Max plan
Team plan
Enterprise plan
Explore pricing
Download apps
Claude log in
News
Claude’s Character
API
Build with Claude
API overview
Developer docs
Explore pricing
Console log in
News
Learn how to build with Claude
Solutions
Collaborate with Claude
AI agents
Coding
Customer support
Educa

In [60]:
# system_prompt = "You are an assistant that analyses the contents of several relevant pages from a company website and create a short brochure about the company culture, customers and career/jobs if you have the information."

system_prompt = "You are an assistant that analysis the content of several relevant pages from a company website and create a short humorous, entertainment, jokey brochure about the company for prospective customers, investor and recruits. Respond include details of company culture customers and career/jobs if you have the information."

In [61]:
def get_brochure_user_prompt(company_name,url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and relevant"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if the response is more that 20000 characters
    return user_prompt

In [62]:
get_brochure_user_prompt("Anthropic", "https://anthropic.com")

Found links: {'links': [{'type': 'company page', 'url': 'https://www.anthropic.com/company'}, {'type': 'about page', 'url': 'https://www.anthropic.com/learn'}, {'type': 'career page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'contact page', 'url': 'https://www.anthropic.com/contact-sales'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'events page', 'url': 'https://www.anthropic.com/events'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}, {'type': 'customers page', 'url': 'https://www.anthropic.com/customers'}]}


"You are looking at a company called: Anthropic\nHere are the contents of its landing page and relevantLanding Page:\nwebpage Title:\nHome \\ Anthropic\nwebpage contents:\nSkip to main content\nSkip to footer\nClaude\nChat with Claude\nOverview\nMax plan\nTeam plan\nEnterprise plan\nExplore pricing\nDownload apps\nClaude log in\nNews\nClaude’s Character\nAPI\nBuild with Claude\nAPI\xa0overview\nDeveloper docs\nExplore pricing\nConsole log in\nNews\nLearn how to build with Claude\nSolutions\nCollaborate with Claude\nAI\xa0agents\nCoding\nCustomer support\nEducation\nFinancial services\nGovernment\nCase studies\nHear from our customers\nResearch\nResearch\nOverview\nEconomic Index\nClaude model family\nClaude Opus 4.1\nClaude Sonnet 4\nClaude Haiku 3.5\nResearch\nClaude’s extended thinking\nCommitments\nInitiatives\nTransparency\nResponsible scaling policy\nTrust center\nSecurity and compliance\nAnnouncement\nISO\xa042001 certification\nLearn\nLearning resources\nCustomer stories\nEngine

In [63]:
def create_brochure(company_name,url):
    response = openai.chat.completions.create(model=MODEL, messages=[
        {"role":"system","content": system_prompt},
        {"role":"user","content": get_brochure_user_prompt(company_name,url)},
        ])
    result = response.choices[0].message.content
    display(Markdown(result))

In [64]:
create_brochure("Anthropic", "https://anthropic.com")

Found links: {'links': [{'type': 'company page', 'url': 'https://www.anthropic.com/company'}, {'type': 'about page', 'url': 'https://www.anthropic.com/learn'}, {'type': 'career page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'events page', 'url': 'https://www.anthropic.com/events'}, {'type': 'solutions page', 'url': 'https://www.anthropic.com/solutions'}, {'type': 'customers page', 'url': 'https://www.anthropic.com/customers'}]}


# Welcome to Anthropic: Safeguarding the Future with a Dash of Humor! 🎉

## Meet Our Star: Claude! 🌟
No, he’s not the new guy in the office; he’s our AI superstar! Say hello to **Claude Opus 4.1**, who’s got more brainpower than your average human! With a talent for coding, customer support, and cracking jokes, Claude is not just any AI; he's the one who can help you while keeping a smile on your face. 

**Let’s chat with Claude:** He might even tell you why the robot crossed the road (hint: **to optimize its algorithms on the other side!**).

## Who We Are: The A-Team of AI 🤓
At Anthropic, we're not just a **public benefit corporation** based in the hip hub of San Francisco—we’re an interdisciplinary squad of researchers, engineers, policy experts, and robotics lovers. Our mission? To build AI that serves humanity's long-term well-being. Sounds like superhero stuff, right? 🦸‍♂️

We’re committed to safety and reliability because let’s face it—nobody wants their home assistant to suddenly demand to *“take over the world.”* 

### Our Values: The Guiding Stars 💫
- **Act for the Global Good:** Because why take over the world when you can save it?
- **Hold Light and Shade:** For every tech advancement, there’s a hangry algorithm waiting to spill an oil spill.
- **Be Awesome to Users:** Ants and people alike—*everyone deserves a good experience!*

## Humans of Anthropic: A Culture Somewhat Like a Tech-Savvy Garden 🌿
We value kindness, honesty, and a good sense of humor... Did we mention the office plants? Seriously, if you love nurturing both greenery and AI, we’ve got the perfect workplace for you! Our diverse team backgrounds; from startups to NASA, give us a one-of-a-kind perspective on building AI tools that even your grandma could use.

### Work-Life Balance? We’ve Got That Covered! 🛌
You’ll find perks from comprehensive health plans to latte art competitions! Weekend hackathons followed by pajama parties? You bet! We believe in nurturing talents and letting you explore your ideas—often over a slice of pizza!

## Join Us! Career Opportunities Abound 🚀
Want to be part of the thrilling (and sometimes hilarious) journey of making AI safer? *Check out our open roles!* Whether you’re a budding developer, a policy enthusiast, or just someone who thinks “AI” sounds like a trendy coffee shop, we want YOU!

**What We Offer:**
- **Health & Wellness:** We keep our team in tip-top shape because a happy employee is an unstoppable one!
- **Anthropic Academy:** Ever wanted to master the art of AI? Our in-house courses will have you building your very own Claude-powered applications in no time!
- **Peanuts:** But only for the comedians. (Just kidding, we prefer to keep our snacks nut-free!)

## Come Chat With Us! 🗣️
Think you’re ready to build the future and maybe even have a laugh or two along the way? Swing by our offices, talk with Claude, and let’s explore how your creativity could help make the world’s safest, friendliest AI. 

Remember: At Anthropic, we take our mission seriously, but we also know that saving the world is more fun when you’re smiling!

📩 **Join the Race to the Top in AI Safety! Visit us at [Anthropic](https://www.anthropic.com)**!

In [56]:
def stream_brochure(company_name,url):
    stream = openai.chat.completions.create(model=MODEL, messages=[
        {"role":"system","content":system_prompt},
        {"role":"user","content":get_brochure_user_prompt(company_name,url)}
        ],
       stream=True 
        )
    # for chunk in stream:
    #     print(chunk.choices[0].delta.content or "", end="")
        
    response = ""
    display_handle = display(Markdown(""),display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        response = response.replace("```", "").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [57]:
stream_brochure("Anthropic", "https://anthropic.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'career page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'solutions page', 'url': 'https://www.anthropic.com/solutions'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'customers page', 'url': 'https://www.anthropic.com/customers'}, {'type': 'events page', 'url': 'https://www.anthropic.com/events'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}]}


### Welcome to Anthropic

#### Company Overview
Anthropic is an AI safety and research company based in San Francisco, dedicated to building reliable, interpretable, and steerable AI systems. Our mission is to ensure that artificial intelligence serves humanity’s long-term well-being. By integrating robust research, thoughtful policy-making, and dynamic product development, we work towards creating safer technologies that can positively impact human society.

#### Our Culture
At Anthropic, we foster a **collaborative and interdisciplinary environment**, where researchers, engineers, policy experts, and operational leaders come together. Our core values — which emphasize acting for the global good, being good to our users, and igniting a race to the top on safety — guide all our actions and decisions. We believe in thoughtful, transparent communication, mutual respect, and a mission-first approach to ensure that every team member plays a role in shaping the future of AI.

#### What We Offer
**Work-Life Balance & Well-Being:**  
- Comprehensive health, dental, and vision insurance
- Inclusive fertility benefits
- Flexible paid time off and mental health support

**Competitive Compensation:**  
- Competitive salaries and equity packages
- Optional equity donation matching
- Robust retirement plans with matching

**Additional Benefits:**  
- Monthly wellness stipend
- Commuter benefits
- Home office stipends

#### Our Customers
Anthropic’s customers span diverse industries including education, financial services, customer support, and government agencies. Through our flagship product, Claude, we empower businesses and organizations with advanced AI tools designed to enhance efficiency and performance while ensuring safety and reliability.

#### Join Us
We invite talented individuals who are passionate about the future of AI to explore job opportunities with us. Whether you have a background in technology, research, policy, or operations, your skills could play a pivotal role in advancing our mission and ensuring that AI development benefits everyone.

Explore current job openings and discover how you can contribute to building the future of safe AI. Together, we can pave the way for a positive technological evolution that serves humanity’s interests at its core.

### Visit us at [Anthropic.com](https://www.anthropic.com) to learn more.

In [59]:
stream_brochure("HuggingFace", "https://huggingface.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.com/'}, {'type': 'models page', 'url': 'https://huggingface.com/models'}, {'type': 'datasets page', 'url': 'https://huggingface.com/datasets'}, {'type': 'enterprise page', 'url': 'https://huggingface.com/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.com/pricing'}, {'type': 'career page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.com/blog'}, {'type': 'community forum', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Welcome to Hugging Face!

**Join the AI Community Building the Future!**

---

## Company Culture

At Hugging Face, we pride ourselves on being a community-driven platform dedicated to advancing the field of artificial intelligence. Our culture fosters collaboration, innovation, and open-source development, allowing machine learning enthusiasts, researchers, and professionals to come together and share their work. We believe in empowering our users to create, discover, and collaborate on machine learning (ML) models, datasets, and applications.

Our workplace embraces inclusivity, creativity, and a passion for technology, making it an ideal environment for individuals looking to make a difference in AI. Our team values continuous learning and knowledge sharing, ensuring that every member feels supported and motivated to reach their full potential.

---

## Customers and Community

Hugging Face serves more than 50,000 organizations, including tech giants like Google, Amazon, Microsoft, and Intel, as well as many startups and academic institutions. Whether you're a developer, a researcher, or part of an enterprise, we provide tailored solutions that cater to your specific needs.

Our community includes over 1 million models and 250,000 datasets, allowing users to easily access a wealth of resources that enhance their projects. We encourage collaboration and provide tools and platforms for users to share their accomplishments and showcase their work, helping to build a vibrant ecosystem.

Join our community through our platforms such as GitHub, Discord, and LinkedIn to connect with like-minded individuals and contribute to the future of AI.

---

## Careers at Hugging Face

**Cultivating Talent and Growth**

At Hugging Face, we are always looking for passionate and talented individuals to join our team. If you have a love for AI and a commitment to collaboration, Hugging Face may be the perfect fit for you!

**Available Opportunities:**
- Positions in fields such as engineering, data science, product management, and marketing.
- Roles that emphasize creativity, problem-solving, and innovation.
- Opportunities for professional growth, with access to a supportive network of experts.

We believe in investing in our employees' success by providing them with the resources, training, and opportunities to thrive and make a real impact in the AI community.

**Why Choose Us?**
- Work on cutting-edge technology that shapes the future of AI.
- Be part of a supportive community that values your contributions.
- Enjoy competitive benefits and flexible working conditions.

Join us on this incredible journey. Together, we can build the tools and solutions that will define the future of artificial intelligence.

---

**Explore your future at Hugging Face today! Sign up or learn more about our open positions on our [Jobs Page](#).**