# Brochure Generator from Website

In [None]:
#all imports
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
import google.generativeai as genai

## Step 1: Get data and links

In [None]:
load_dotenv(override=True)
api_key = os.getenv('GEMINI_API_KEY')

# connect to gemini
genai.configure(api_key = api_key)

In [None]:

# Some websites need you to use proper headers when fetching them:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links.
    """
    url: str
    title: str
    body: str
    text: str
    links: List[str]

    def __init__(self, url: str):
        self.url = url
        self.title = ""
        self.body = ""
        self.text = ""
        self.links = []
        self.scrape()

    def scrape(self):
        """Scrapes the content of the website at the provided URL."""
        try:
            response = requests.get(self.url, headers=headers)
            if response.status_code != 200:
                print(f"Failed to retrieve the URL. Status code: {response.status_code}")
                return
            
            soup = BeautifulSoup(response.content, 'html.parser')
            self.body = response.content
            self.title = soup.title.string if soup.title else "No title found!"

            # Remove irrelevant tags as we only need text
            for irrelevant in soup.body.find_all(["script", "style", "img", "input"]):
                irrelevant.decompose()
            
            self.text = soup.body.get_text(separator='\n', strip=True) if soup.body else ""

            # Extract links
            links = [link.get('href') for link in soup.find_all('a')]
            self.links = [link for link in links if link]

        except Exception as e:
            print(f"An error occurred while scraping: {e}")
    
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Content:\n{self.text}\n\n"


In [None]:
web = Website("https://edwarddonner.com")
web.links

In [None]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [None]:
print(link_system_prompt)

In [None]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links. Don't include json term in output\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
def get_prompt(website):
    return f"context: {link_system_prompt}, prompt: {get_links_user_prompt(website)}"

In [None]:
def get_links(url):
    website = Website(url)
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(get_prompt(website))
    result = json.loads(response.text)
    return result

In [None]:
get_links("https://anthropic.com")

## Step 2: Generate Brochure

In [None]:
def get_details(url):
    result = "Landing Page:\n"
    website = Website(url)
    result += website.get_contents()
    links = get_links(url)
    for link in links['links']:
        result += f"\n\n{link['type']}:\n"
        result += Website(link['url']).get_contents()
    return result

In [None]:
print(get_details("https://anthropic.com"))

In [None]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [67]:
def get_brochure_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return f"context: {system_prompt}, prompt: {user_prompt}"

In [68]:
get_brochure_prompt("Anthropic", "https://anthropic.com")

'context: You are an assistant that analyzes the contents of several relevant pages from a company website and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.Include details of company culture, customers and careers/jobs if you have the information., prompt: You are looking at a company called: Anthropic\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding Page:\nWebpage Title:\nHome \\ Anthropic\nWebpage Content:\nClaude\nOverview\nTeam\nEnterprise\nAPI\nPricing\nResearch\nCompany\nCareers\nNews\nTry Claude\nAI\nresearch\nand\nproducts\nthat put safety at the frontier\nClaude.ai\nMeet Claude 3.5 Sonnet\nClaude 3.5 Sonnet, our most intelligent AI model, is now available.\nTalk to Claude\nAPI\nBuild with Claude\nCreate AI-powered applications and custom experiences using Claude.\nLearn more\nAnnouncements\nIntroducing comp

In [71]:
# get brochure from the prompt data

def generate_brochure(company_name, url):
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(get_brochure_prompt(company_name, url))
    display(Markdown(response.text))

In [None]:
generate_brochure("Anthropic", "https://anthropic.com")

In [74]:
# generate stream of content

def generate_brochure_stream(company_name, url):
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(get_brochure_prompt(company_name, url), stream = True)
    result = ""
    display_handler = display(Markdown(""), display_id=True)
    for chunk in response:
        chunk.text.replace("'''", "").replace("markdown", "")
        result += chunk.text
        update_display(Markdown(result), display_id=display_handler.display_id)


In [75]:
generate_brochure_stream("Anthropic", "https://anthropic.com")

# Anthropic:  Building a Safer Future with AI

**A brochure for prospective customers, investors, and recruits**


**(Front Page - Image: A stylized image representing AI safety or collaboration)**

**Anthropic: AI Research & Products that Prioritize Safety**

We are an AI safety and research company based in San Francisco.  Our mission is to ensure transformative AI helps people and society flourish. We build reliable, interpretable, and steerable AI systems, conducting cutting-edge research and deploying our findings through innovative products and partnerships.


**(Inside Left Panel - Customer Focus)**

**Claude:  The Intelligent AI System You Can Rely On**

Anthropic's flagship product, Claude, is a powerful and safe AI system available through our API.  Businesses, nonprofits, and individuals can leverage Claude to create AI-powered applications and custom experiences.  Claude is designed for:

* **Reliable performance:**  Built with safety as a core principle.
* **Interpretability:** Understanding how Claude works, increasing trust and accountability.
* **Steerability:**  Guiding Claude's behavior to ensure desired outcomes.

**Learn more & Try Claude:** [Link to Claude page]


**(Inside Right Panel - Company Culture & Careers)**

**A Culture of Collaboration & Innovation**

Anthropic fosters a collaborative and high-trust environment. Our interdisciplinary team comprises researchers, engineers, policy experts, business leaders, and operators from diverse backgrounds.  We value:

* **Mission-driven work:**  Passionate about building a safer future with AI.
* **High trust & open communication:** We assume good faith, disagree kindly, and prioritize honesty.
* **Collaboration:**  We work together as one big team, leveraging our diverse expertise.

**Careers at Anthropic:**

We are looking for talented individuals across a range of disciplines, including:

* Machine Learning Engineers
* Researchers (AI Safety, ML, etc.)
* Policy Experts
* Product Managers
* Operations Professionals

See open roles and apply: [Link to Careers page]


**(Back Panel - Investment Highlights & Research)**

**Investing in a Safer AI Future**

Anthropic is at the forefront of AI safety research. Our work focuses on:

* **Constitutional AI:** Developing AI systems guided by principles of harmlessness.
* **Interpretability:**  Making AI systems more understandable and transparent.
* **Reinforcement Learning from Human Feedback (RLHF):** Training AI systems to align with human values.

Our research is regularly shared with the world, fostering open collaboration and driving advancements in the field.

**Contact:**

[Website Link] | [Twitter Link] | [LinkedIn Link] | [Email Address]


**(Small Print)**  © 2025 Anthropic PBC


