# A full business solution

Create a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits using their website.

In [1]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants
# load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY') #OPENAI_API_KEY is in Path variable on local machine

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [3]:
#Use class to represent a webpage

class Website:
    """
    A utility class to represent a website that we have scraped
    """
    #data structure
    url: str 
    title: str
    body: str
    links: List[str]
    text: str
    
    #initialize
    def __init__(self,url):
        self.url = url 
        response = requests.get(url) # stores response from url
        self.body = response.content #extracts raw HTML content
        soup = BeautifulSoup(self.body,'html.parser') # parses HTML content
        self.title = soup.title.string if soup.title else "No title found" 
        #if body tag exists - remove some elements since they don't contain meaningful text
        if soup.body:
            for irrelevant in soup.body(["script","style","img","input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n",strip=True) #get clean text from <body> tag with line break
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')] # collect all links on webpage with a tag
        self.links = [link for link in links if link] # Filter out None values
        
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
            

In [4]:
#Test it with reddit
reddit = Website('http://www.reddit.com/')
print(reddit.get_contents())

Webpage Title:
Reddit - Heart of the internet
Webpage Contents:
Skip to main content
Open menu
Open navigation
Go to Reddit Home
Get App
Get the Reddit app
Log In
Log in to Reddit
Expand user menu
Open settings menu
England squad announced
England squad for World Cup Qualifiers
r/soccer
and more
AA plane catches fire
American Airlines plane catches fire at Denver airport
r/aviation
and more
Silent Hill f reveal trailer
Silent Hill F Key Art Revealed
r/silenthill
and more
Normani & DK Metcalf engaged
Normani & DK Metcalf Engaged
r/popculturechat
and more
Johns Hopkins job cuts
Federal Cuts Prompt Johns Hopkins to Cut More Than 2,000 Workers
r/news
and more
John Feinstein dead at 69
Sports columnist, author John Feinstein dies at age 69
r/sports
and more
Hot
Open sort options
Best
Hot
New
Top
Rising
Everywhere
Open sort options
Everywhere
United States
Argentina
Australia
Bulgaria
Canada
Chile
Colombia
Croatia
Czech Republic
Finland
France
Germany
Greece
Hungary
Iceland
India
Ireland
Ita

In [5]:
reddit.links

['#main-content',
 '/',
 'https://www.reddit.com/login/',
 '/search/?q=%22England+squad%22+AND+%28Tuchel+OR+%22world+cup%22%29&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=3c4f338a-4022-4936-85ac-61eb36aa98c7',
 '/search/?q=plane+AND+fire+AND+%28Denver+OR+%22American+Airlines%22%29&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=a490446b-6e52-4467-bc28-d9051968e383',
 '/search/?q=%22Silent+Hill+f%22&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=0dda9d9c-2ce3-4e24-bc2e-906cf821cca3',
 '/search/?q=Metcalf+AND+Normani&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=3afee045-8eea-4fe8-88a7-15174d7887ad',
 '/search/?q=%22Johns+Hopkins%22+AND+%28jobs+OR+workers%29&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=517a72f2-9251-4557-860b-27ec25c2d689',
 '/search/?q=%22John+Feinstein%22&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=d1eae849-9bee-4d0f-bd9d-971c123fd287',
 '/r/popular/best/',
 '/r/popular/hot/',
 

# Have GPT figure out which links are relevant #
### Use a call to gpt-4o-mini to read links and respond in structured JSON ###

Want to decide which links are relevant and replace relative links as /about with "https://company.com/about"
This is one shot prompting - providing a singular example of how it should respond in prompt

In [6]:
link_system_prompt = """
You have a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an about me page, company page, or a Career\jobs page.
You should respond in JSON as in this example: 
{
"links": [
    {"type":"About Page", "url":"https://www.company_name.com/about"},
    {"type":"Career Page","url":"https://www.company_name.com/maybe/other/links/careers"}
    ]
}
"""

  link_system_prompt = """


In [7]:
def get_user_links_prompt(website):
    user_prompt = f"""
    Here is the list of links on the website of {website.url}
    Please decide which of these are relevant web links for a brochure about the company.  Respond with the full url and do not include
    terms of Service, Privacy, or email links.
    
    The links are here (some may be relative): {website.links}
    """
    return user_prompt
    

In [8]:
print(get_user_links_prompt(reddit))


    Here is the list of links on the website of http://www.reddit.com/
    Please decide which of these are relevant web links for a brochure about the company.  Respond with the full url and do not include
    terms of Service, Privacy, or email links.

    The links are here (some may be relative): ['#main-content', '/', 'https://www.reddit.com/login/', '/search/?q=%22England+squad%22+AND+%28Tuchel+OR+%22world+cup%22%29&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=3c4f338a-4022-4936-85ac-61eb36aa98c7', '/search/?q=plane+AND+fire+AND+%28Denver+OR+%22American+Airlines%22%29&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=a490446b-6e52-4467-bc28-d9051968e383', '/search/?q=%22Silent+Hill+f%22&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=0dda9d9c-2ce3-4e24-bc2e-906cf821cca3', '/search/?q=Metcalf+AND+Normani&source=trending&cId=657fbe21-ce37-4884-8d2e-ff275418f037&iId=3afee045-8eea-4fe8-88a7-15174d7887ad', '/search/?q=%22Johns+Hopkins%22+AND+%2

In [9]:
#Now let's write function

def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model = MODEL,
        messages= [
            {"role":"system","content":link_system_prompt},
            {"role":"user","content":get_user_links_prompt(website)}
        ],
        response_format = {"type":"json_object"} # We tell OpenAI to provide a Json object back in it's response. This is only for OpenAI.
    )
    result = completion.choices[0].message.content 
    return json.loads(result)

In [10]:
get_links("https://reddit.com")

{'links': [{'type': 'Company Page', 'url': 'https://www.redditinc.com'},
  {'type': 'Career Page', 'url': 'https://www.redditinc.com/careers'},
  {'type': 'Press Page', 'url': 'https://www.redditinc.com/press'},
  {'type': 'About Page', 'url': 'https://redditblog.com/'}]}

# Make a brochure #

Here we want to assemble all the details from above into a separate prompt.

In [None]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links: ", links)
    # put links in the block of text with contents of url
    # and grab contents of each of those links to add to result block
    for link in links["links"]:
        result += f"\n\n {link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [14]:
print(get_all_details("https://www.anthropic.com"))

Found links:  {'links': [{'type': 'About Page', 'url': 'https://www.anthropic.com/company'}, {'type': 'Career Page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'Research Page', 'url': 'https://www.anthropic.com/research'}, {'type': 'Team Page', 'url': 'https://www.anthropic.com/team'}]}
Landing page:
Webpage Title:
Home \ Anthropic
Webpage Contents:
Skip to main content
Skip to footer
Claude
API
Research
Commitments
Learn
News
Try Claude
AI
research
and
products
that put safety at the frontier
Claude.ai
Meet Claude 3.7 Sonnet
Claude 3.7 Sonnet, our most intelligent AI model, is now available.
Talk to Claude
API
Build with Claude
Create AI-powered applications and custom experiences using Claude.
Learn more
Claude 3.7 Sonnet and Claude Code
Introducing Claude 3.7 Sonnet, our most intelligent model yet and the first hybrid reasoning model. We’re also launching Claude Code, an agentic tool for coding.
Read the announcement
Model details
Claude 3.7 Sonnet
Research insights
Claud

In [15]:
#2nd call to open AI with all this info to make brochure
system_prompt = "You are an assistant that analyzes the contents of several related web pages from a company website \
    and creates a short brochure about the company for prospective customers, investors and recruits.  Respond in markdown. \
    Include details of company culture, customers and careers/jobs if relevant information is available."

In [18]:
def get_brochure_details(company_name, url):
    user_prompt = f"You are looking at a company called {company_name} \n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure: \n"
    user_prompt += get_all_details(url)
    user_prompt += user_prompt[:20000] # truncate after 20k characters
    return user_prompt

In [19]:
get_brochure_details("Eli - Lilly","https://www.lilly.com")

Found links:  {'links': []}


'You are looking at a company called Eli - Lilly \nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure: \nLanding page:\nWebpage Title:\nJust a moment...\nWebpage Contents:\nEnable JavaScript and cookies to continue\n\nYou are looking at a company called Eli - Lilly \nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure: \nLanding page:\nWebpage Title:\nJust a moment...\nWebpage Contents:\nEnable JavaScript and cookies to continue\n\n'

In [20]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_details(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [23]:
create_brochure("Toyota","https://www.toyota.com")

Found links:  {'links': [{'type': 'About Page', 'url': 'https://global.toyota/en/'}, {'type': 'Career Page', 'url': 'https://careers.toyota.com/us/en/'}, {'type': 'Company Page', 'url': 'https://pressroom.toyota.com/'}, {'type': 'Toyota Financial Services', 'url': 'https://www.toyotafinancial.com/us/en.html'}, {'type': 'Local Dealers', 'url': 'https://www.toyota.com/dealers/#default'}]}


# Toyota Brochure

Welcome to **Toyota**, where innovation meets excellence in the automotive world. As a leader in creating high-quality cars, trucks, SUVs, and hybrids, Toyota is committed to delivering mobility solutions that inspire and empower customers globally.

---

## Our Vehicles
Toyota's diverse lineup features:

- **Cars & Minivans**: From the classic **Corolla** to the stylish **Camry** and versatile **Sienna**.
- **Trucks**: Powerful options like the **Tacoma** and **Tundra**, designed for those who demand durability and performance.
- **Crossovers & SUVs**: The exciting **RAV4** and the luxurious **Highlander** offer flexibility for family adventures.
- **Electrified Vehicles**: Join the future with our hybrids, plug-in hybrids, and fuel cell electric vehicles, like the **Prius** and **Mirai**. 

Each model comes equipped with cutting-edge technology and safety features aimed at enhancing your driving experience.

---

## Company Culture
At Toyota, we embrace a culture of **collaboration, innovation, and sustainability**. Our core mission is to create a better society through mobility, ensuring that every individual can move freely and comfortably. We believe in empowering our team members, fostering an inclusive and diverse environment that celebrates creativity and continuous learning.

### Employee Experience
- **Growth Opportunities**: We invest in our employees by providing ample opportunities for professional development and career advancement.
- **Team Spirit**: Collaboration across various departments encourages knowledge sharing and teamwork, essential for innovative solutions.
- **Community Engagement**: Toyota is dedicated to supporting local communities and initiatives aimed at sustainability and education.

---

## Customer Commitment
Toyota is dedicated to providing an exceptional customer experience throughout the ownership journey:
- **ToyotaCare**: Every new Toyota comes with two years of complementary maintenance and roadside assistance.
- **Flexible Financing**: Through **Toyota Financial Services**, we offer innovative financing solutions tailored to meet individual customer needs.
- **SmartPath**: An online shopping tool that simplifies the vehicle purchasing process, making it more accessible and user-friendly.

---

## Career Opportunities
Toyota is always on the lookout for talented individuals to join our team. If you believe in creating the future of mobility, consider a career with us! We have roles in various fields, including:

- **Technology**: Join our teams to develop innovative software and technology solutions.
- **Engineering**: Work on next-gen vehicle designs and systems to enhance performance and safety.
- **Customer Service**: Be part of a dynamic team that ensures ultimate satisfaction for our customers.

### How to Apply
Visit our careers page to learn more about open positions and to submit your application. We look forward to welcoming passionate and motivated individuals to the Toyota family.

---

## Join Us in Driving the Future
Explore beyond zero with Toyota's commitment to sustainability and innovation. Together, we can lead the way to a cleaner, safer, and smarter future in mobility.

**Visit us**: [Toyota Official Site](https://www.toyota.com)

--- 

For inquiries, please contact us via our website or find a local dealer. Toyota - **Let’s Go Places.**

### Streaming Data ###

In [None]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_details(company_name, url)}
          ],
        stream=True #This allows data to 'flow' back in chunks, rather than a single text of response
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True) #empty markdown element
    for chunk in stream: # as data arrives
        response += chunk.choices[0].delta.content or '' #extract text from response chunk, where delta contains only the newest part of the response
        response = response.replace("```","").replace("markdown", "") #Ensure we don't add None but rather an empty string
        update_display(Markdown(response), display_id=display_handle.display_id)

In [25]:
stream_brochure("Cedars Sinai Hospital","https://www.cedars-sinai.org")

Found links:  {'links': []}



# Cedars Sinai Hospital

### About Us
Cedars Sinai Hospital is a leading healthcare institution located in Los Angeles, California. Renowned for its exceptional patient care and innovative medical research, Cedars Sinai is committed to improving health outcomes and providing a comprehensive range of services to the community.

### Company Culture
At Cedars Sinai, we foster a collaborative and inclusive environment where every team member's contributions are valued. Our culture is built on the principles of compassion, integrity, and excellence. We are dedicated to creating a workplace that encourages professional development, teamwork, and mutual respect.

### Our Patients
Our primary focus is our patients. Cedars Sinai serves a diverse population, providing high-quality healthcare to individuals from all walks of life. We strive for excellence in clinical practices and maintain a patient-centered approach in everything we do, ensuring each individual receives personalized treatment that is respectful and compassionate.

### Careers & Job Opportunities
Cedars Sinai Hospital offers a variety of career paths in the healthcare sector, ranging from clinical positions to administrative roles. We seek passionate individuals who are committed to making a difference in the lives of patients. Join us for a fulfilling career where you can grow, learn, and make a significant impact.

- **Benefits of Working with Us:**
  - Competitive salary and comprehensive benefits.
  - Opportunities for professional development and advancement.
  - A supportive and inclusive work environment.

### Join Us
Whether you're looking for exceptional healthcare services, seeking a career in medicine, or considering an investment in the healthcare sector, Cedars Sinai Hospital stands at the forefront of patient care and medical innovation. 

For more information about our services, career opportunities, and values, visit us at our [official website](#).

---

*This brochure is designed to provide a glimpse of what Cedars Sinai Hospital represents. We invite you to explore further and become a part of our mission to deliver exceptional healthcare.* 

