In [3]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown,display,update_display
from openai import OpenAI

In [4]:
load_dotenv()
api_key=os.getenv('OPENAI_API_KEY')

model = 'gpt-4o-mini'
openai = OpenAI()

In [5]:
class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]   

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [6]:
ed = Website('https://edwarddonner.com')
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/',
 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/',
 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/',
 'https://edwarddonner.com/2024/12/21/llm-

In [7]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [8]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [9]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/
https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/
https://edwarddonner.com/2025/01/23/ll

In [10]:
def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}  
    )
    result = completion.choices[0].message.content
    return json.loads(result)


In [11]:
get_links("https://anthropic.com")

{'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'},
  {'type': 'learn page', 'url': 'https://www.anthropic.com/learn'},
  {'type': 'solutions page',
   'url': 'https://www.anthropic.com/solutions/agents'},
  {'type': 'customers page', 'url': 'https://www.anthropic.com/customers'},
  {'type': 'transparency page',
   'url': 'https://www.anthropic.com/transparency'}]}

In [12]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [13]:
print(get_all_details("https://anthropic.com"))

Found links: {'links': [{'type': 'company page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}]}
Landing page:
Webpage Title:
Home \ Anthropic
Webpage Contents:
Skip to main content
Skip to footer
Claude
Chat with Claude
Overview
Team plan
Enterprise plan
Education plan
Explore pricing
Download apps
Claude log in
News
Claude’s character
API
Build with Claude
API overview
Developer docs
Explore pricing
Console log in
News
Learn how to build with Claude
Solutions
Collaborate with Claude
AI agents
Coding
Customer support
Case studies
Hear from our customers
Research
Research
Overview
Economic Index
Claude model family
Claude 3.7 Sonnet
Claude 3.5 Haiku
Claude 3 Opus
Research
Claude’s extended thinking
Commitments
Initiatives
Transparency
Responsible scaling policy
Trust center
Security and compliance
Announcement
ISO 42001 certification
Learn
Learning resources
Customer stories
Engineering at Anthropic
Anthropic Academy
C

In [14]:
get_brochure_system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [15]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [16]:
print(get_brochure_user_prompt("HuggingFace", "https://huggingface.co"))


Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community discussion page', 'url': 'https://discuss.huggingface.co'}, {'type': 'twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'linkedin page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}
You are looking at a company called: HuggingFace
Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Posts
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building

In [17]:
def create_brochure(company_name,url):
    response=openai.chat.completions.create(
        model=model,
        messages=[
            {"role":"system","content":get_brochure_system_prompt},
            {"role":"user","content":get_brochure_user_prompt(company_name,url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [18]:
create_brochure("Anthropic","https://anthropic.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}, {'type': 'contact page', 'url': 'https://www.anthropic.com/contact-sales'}, {'type': 'solutions page', 'url': 'https://www.anthropic.com/solutions/agents'}]}


# Welcome to Anthropic: Where AI Meets Humanity (and a Dash of Humor!)

Ahoy there, future customers, investors, and potential recruits! Welcome aboard the Anthropic spaceship, where we're not just building AI; we're crafting intelligent pals. Meet *Claude*, our ever-growing family of AI models—because what’s better than one Claude? Three! (Yes, you heard that right!)

## 🎤 **Meet Claude** 
**Claude 3.7 Sonnet:** The brightest star in our AI universe! Why settle for mundane chat when you can enjoy a poetry slam with an AI? (*Disclaimer: All sonnets are subject to Claude's interpretation of Shakespeare... and random cat memes.*)

### 🧠 **What's the Big Idea?**
At Anthropic, we believe in a world where AI is a helpful sidekick, not a sci-fi villain. Think less *Skynet*, more *fluffy robot assistant* who always leaves you thoughtful notes (or puns). Our mission? To ensure the well-being of humanity through safe and responsible AI development. 

## 👥 **Our Culture: Join the Fun!**
- **Transparency:** We keep it real! No shady stuff here, just good ol' fashioned ethical AI chat.
- **Learning & Growth:** Welcome to the Anthropic Academy! This isn't just a place to learn; it's where you level up your skills, one Claude conversation at a time!
- **Diverse Team:** Our crew is as varied as a breakfast buffet, mixing up techies, philosophers, and snack enthusiasts who believe that AI can be both intelligent and entertaining.

## 🚀 **Let’s Talk Customers!**
From management consultants to education institutions, our diverse clientele knows that Claude is the co-pilot they never knew they needed. Together, we explore new heights in AI that serve humanity without wearing a villain cape! (Capes are so last season anyway.)

## 💼 **Careers: Join Us on This Exciting Journey!**
Wanna be part of our mission to save the world from mundane AI? We’re on the lookout for talented individuals with a passion for safety in tech and a sense of humor! Our secret is simple:
1. Be ready for the future.
2. Let your creativity shine (and spill your coffee when you laugh at Claude’s jokes).
3. Embrace a culture where questioning, learning, and the occasional dad joke is encouraged!

## 🌟 **Final Thoughts**
Whether you're a customer, investor, or job seeker, we promise one thing at Anthropic: **an intelligent experience that’s never boring!** So come chat with Claude, where the AI is smart, and the laughs multiply faster than our server queries (which, let's be real, is pretty fast).

## 🤖 **Ready to Get Started?**
Dive into our offerings or grab a cup of coffee and join the conversation with Claude today! 

---

*Anthropic: Who says AI can't be fun and safe? Now, that's what we call a serious contradiction!*

In [None]:
def stream_brochure(company_name,url):
    stream = openai.chat.completions.create(
        model=model,
        messages=[
            {'role':'system','content':get_brochure_system_prompt},
            {'role':'user','content':get_brochure_user_prompt(company_name,url)}
        ],
        stream=True
    )
    response = ""
    display_handle = display(Markdown(""),display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response),display_id=display_handle.display_id)

In [20]:
stream_brochure("Anthropic","https://anthropic.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}]}


# Welcome to Anthropic: The Future of Friendly AI!

---

**🌟 Who Are We?**  
At Anthropic, we like to think of ourselves as the friendly neighborhood superheroes of the AI world! Our star player, **Claude**, is not just any AI — he’s equipped with a 3.7 Sonnet, 3.5 Haiku, and even magical thinking! While we can't promise that he’ll recite Shakespeare on command (yet), we guarantee he’s great at handling serious tasks with a sprinkle of humor.

---

**🏢 Company Culture: Where Ideas Flow Like Coffee!**  
At Anthropic, we believe in bold steps and taking visionary leaps forward. (After all, we’re all about *enhancing human well-being*! 🌍) However, we’re also big fans of intentional pauses—just like coffee breaks—where you can ponder the profound mysteries of what it means to design powerful technologies responsibly. Our team embraces safety, transparency, and collaboration, and we foster an environment where laughs are mandatory. We like to call it “serious fun”. Can we hear a “yay” for casual brainstorming sessions?

---

**👥 Who’s Our Customer?**  
Are you a tech-savvy organization, an academic institution, or just someone who wishes their toaster could become sentient? Whatever the case, we’ve got a plan for you! From enterprises who need robust AI solutions to classrooms eager to explore AI through our **Anthropic Academy**, we cater to many. Pro tip: Coffee is included in the package. Just kidding...sort of.

---

**👩‍💻 Careers: Are You the Next AI Whisperer?**  
Think you have what it takes to join our ranks? We’re always on the lookout for imaginative wizards and talented individuals to help us build the future of safe AI. *Job title suggestions*: “Chief Magical Data Enterer” or “AI Safety Ninja.” Okay, we may not have those roles, but we promise you’ll love the actual positions available! Bring your imagination (and maybe a cat) along for the ride. Check out our open roles – who knows, you might just be the next Claude-tamer we’re searching for!

---

**🚀 Our Future Vision: No Dark Side Here!**  
We're committed to safe AI development, ensuring that our creations serve humanity’s long-term well-being and don’t become overlords in the process. When we say "responsible scaling," we're not just talking about gym workouts!

---

**📚 Come Along for the Ride!**  
With Claude by our side, we’re building better AI for all! If you want to join the fun, collaborate with us, or talk about the mysteries of safety in AI, give us a shout. Whether you're a curious customer, a passionate investor, or the next prodigy, we’d love to hear from you!

---

> **Why choose Anthropic?**  
> Because robots are great, but friendly robots are better! Plus, we have coffee.

*Anthropic: Making AI Development Safe, Responsible, and Fun!*  

---
