In [49]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions

import ollama

from pydantic import BaseModel
from typing import List, Tuple, Dict

In [12]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key = api_key)

service = webdriver.ChromeService(executable_path = "C:\\Program Files\\chromedriver-win64\\chromedriver.exe")
# so webpage won't pop up, better performance
chrome_options = ChromeOptions()
chrome_options.add_argument("--headless")

MODEL = 'gpt-4o-mini'

### 1 - define website object
- stores essential messages
- uses selenium, also scrapes links

In [3]:
class Website:
    def __init__(self, url):
        self.url = url
        response = self._scrape()
        soup = BeautifulSoup(response, 'html.parser')
        self.title = soup.title.string if soup.title else "No Title Found"
        for irrelevant in soup.body(['script', 'style', 'img', 'input']):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator = '\n', strip = True)
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link and link[:4] == 'http']

    def _scrape(self) -> str:
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.get(self.url)
        driver.implicitly_wait(0.001)
        page = driver.page_source
        driver.close()
        return page

    def get_content(self):
        return f"Webpage Title: \n{self.title}\nWebpage Contents: \n{self.text}\n\n" 

In [14]:
ed = Website('https://www.pinecone.io/')
print(ed.links)

['https://docs.pinecone.io', 'https://app.pinecone.io/?sessionType=login', 'https://app.pinecone.io/?sessionType=signup', 'https://app.pinecone.io/', 'https://docs.pinecone.io/docs/get-started/overview', 'https://docs.pinecone.io/models/overview', 'https://docs.pinecone.io/reference/architecture/serverless-architecture', 'https://docs.pinecone.io/guides/data/query-data#filter-by-metadata', 'https://docs.pinecone.io/reference/architecture/serverless-architecture', 'https://docs.pinecone.io/guides/indexes/understanding-indexes#sparse-indexes', 'https://docs.pinecone.io/guides/inference/rerank', 'https://docs.pinecone.io/guides/inference/rerank', 'https://docs.pinecone.io/guides/indexes/implement-multitenancy', 'https://docs.pinecone.io/guides/indexes/implement-multitenancy', 'https://docs.pinecone.io/integrations/overview', 'https://app.pinecone.io/', 'https://x.com/pinecone', 'https://www.linkedin.com/company/pinecone-io', 'https://www.youtube.com/@pinecone-io', 'https://github.com/pine

### 2 - gpt-4o-mini identify relevant links 

In [None]:
class Link(BaseModel):
    link_type: str
    url: str

class Links(BaseModel):
    links: List[Link]

In [10]:
link_system_prompt = """You are provided with a list of links found on a webpage. 
You are able to decide which of the links would be most relevant to include in a brochure about the company, 
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"link_type": "about page", "url": "https://full.url/goes/here/about"},
        {"link_type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [28]:
def link_user_prompt(website: Website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

def get_links(website: Website):
    completion = client.beta.chat.completions.parse(
        model = MODEL,
        messages = [
            {"role": "system", "content": link_system_prompt},
            {"role": "system", "content": link_user_prompt(website)}
        ], 
        response_format = Links
    )
    
    response = completion.choices[0].message
    links = response.parsed
    return links

In [29]:
web = Website('https:/anthropic.com')
res = get_links(web)
print(res)
for link in res.links:
    print(link.url)

links=[Link(link_type='company page', url='https://www.anthropic.com/company'), Link(link_type='about page', url='https://www.anthropic.com/team'), Link(link_type='careers page', url='https://www.anthropic.com/careers'), Link(link_type='news page', url='https://www.anthropic.com/news'), Link(link_type='contact sales page', url='https://www.anthropic.com/contact-sales'), Link(link_type='events page', url='https://www.anthropic.com/events'), Link(link_type='learn page', url='https://www.anthropic.com/learn')]
https://www.anthropic.com/company
https://www.anthropic.com/team
https://www.anthropic.com/careers
https://www.anthropic.com/news
https://www.anthropic.com/contact-sales
https://www.anthropic.com/events
https://www.anthropic.com/learn


### 3 - gather all details and deploy

In [35]:
def get_web_details(url):
    result = "Landing page: \n<landing_page>\n"
    website = Website(url)
    result += website.get_content()
    result += '\n</landing_page>'
    links = get_links(website)
    print("Found links:", links.links)
    for link in links.links:
        result += f"\n\n{link.link_type}: \n<{link.link_type}>\n"
        result += Website(link.url).get_content()
        result += f"\n</{link.link_type}_page>\n"
    return result

In [36]:
details = get_web_details('https:/anthropic.com')
print(details[:100])

Found links: [Link(link_type='about page', url='https://www.anthropic.com/company'), Link(link_type='careers page', url='https://www.anthropic.com/careers'), Link(link_type='team page', url='https://www.anthropic.com/team')]
Landing page: 
<landing_page>
Webpage Title: 
Home \ Anthropic
Webpage Contents: 
Skip to main conte


In [46]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n\n"
    user_prompt += get_web_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 5,000 characters
    return user_prompt

# get_brochure_user_prompt('Anthropic', 'https:/anthropic.com')

In [47]:
def create_brochure(company_name, url):
    response = client.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "system", "content": system_prompt}, 
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ]
    )
    result = response.choices[0].message.content.replace("```", "").replace("markdown", "")
    display(Markdown(result))
    return result

In [48]:
result = create_brochure('Anthropic', 'https:/anthropic.com')

Found links: [Link(link_type='homepage', url='https://www.anthropic.com/'), Link(link_type='about page', url='https://www.anthropic.com/company'), Link(link_type='careers page', url='https://www.anthropic.com/careers'), Link(link_type='team page', url='https://www.anthropic.com/team'), Link(link_type='research page', url='https://www.anthropic.com/research'), Link(link_type='news page', url='https://www.anthropic.com/news'), Link(link_type='events page', url='https://www.anthropic.com/events'), Link(link_type='learn page', url='https://www.anthropic.com/learn'), Link(link_type='customers page', url='https://www.anthropic.com/customers')]


# Anthropic Brochure

---

## About Anthropic

At Anthropic, we are committed to building AI systems that prioritize safety and reliability at their core. Founded on the belief that AI will significantly impact humanity, we conduct extensive research and develop solutions to harness AI's potentials while minimizing its risks. Our flagship product, **Claude**, showcases our most advanced AI models, providing businesses and society with reliable tools designed for the long-term benefit of humanity.

---

## Our Mission

We aim to build systems that people can trust. Our focus on AI safety leads us to:

- Conduct rigorous research in AI ethics and safety.
- Develop interpretable and steerable AI systems.
- Collaborate across industries, government, and academia to promote a safe AI ecosystem.

---

## Core Values

1. **Act for the Global Good**: We prioritize decisions that maximize positive outcomes for humanity.
2. **Hold Light and Shade**: Balancing the potential risks and benefits of AI is central to our approach.
3. **Be Good to Our Users**: We define "users" broadly, valuing kindness and generosity in all interactions.
4. **Ignite a Race to the Top on Safety**: We strive to set the industry standard for safety and security.
5. **Do the Simple Thing That Works**: We embrace empirical problem-solving to maximize impact.
6. **Be Helpful, Honest, and Harmless**: Maintaining trust and open communication is vital to our culture.
7. **Put the Mission First**: Our shared purpose drives collaboration and swift action.

---

## Our Products

### Claude Models

- **Claude Opus 4**: Our most advanced model, designed to handle complex tasks with ease.
- **Claude Sonnet 4**: Focusing on coding capabilities and collaboration between AI and human teams.
- **Claude Haiku 3.5**: Effective conversational abilities for customer support and engagement.

### API Services

We offer robust APIs to enable you to build AI-powered applications tailored to your business needs. Our comprehensive developer documentation ensures a seamless integration process.

---

## Company Culture

Anthropic fosters a collaborative environment where multidisciplinary teams work together. Our team includes researchers, engineers, policy experts, and operational leaders with backgrounds from various domains. We believe in nurturing our team and providing a supportive atmosphere that values open communication and continuous learning.

Our **Anthropic Academy** provides resources to enhance skills in AI development, ensuring that our team and customers can stay at the forefront of technology.

---

## Careers at Anthropic

Join us in shaping the future of safe AI! We are always on the lookout for passionate individuals who are eager to contribute to our mission. Explore our current job openings and become part of a forward-thinking company dedicated to making a positive impact in the world through AI.

---

## Commitment to Safety

As a **Public Benefit Corporation**, Anthropic is dedicated to the responsible development of advanced AI systems. Our governance structure encourages transparency and prioritizes the long-term benefits of our technology. We are committed to sharing insights and findings related to AI safety, ensuring that we contribute to a wider understanding of the implications of AI on society.

---

## Get in Touch

To learn more about our products or to discuss partnership opportunities, visit our website at [anthropic.com](https://www.anthropic.com). Together, we can build a future where AI serves humanity's best interests.

---

### Follow us on Social Media

Stay updated with our news and developments by following us on our social platforms!

---

Thank you for considering Anthropic—where innovation meets responsibility!

### 4 - Enable Streaming
- return in real time, interactive

In [59]:
def create_stream_brochure(company_name, url):
    stream = client.responses.create(
        model = MODEL,
        input = [
            {"role": "system", "content": system_prompt}, 
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream = True 
    ) # responses api

    response = ""
    display_handle = display(Markdown(""), display_id = True)
    for event in stream:
        if event.type == 'response.output_text.delta':
            response += event.delta
            response = response.replace("```", "").replace("markdown", "")
            # update display
            update_display(Markdown(response), display_id = display_handle.display_id)
    
    return response

In [60]:
res_anthropic = create_stream_brochure('Anthropic', 'https:/anthropic.com')

Found links: [Link(link_type='company page', url='https://www.anthropic.com/company'), Link(link_type='about page', url='https://www.anthropic.com/about'), Link(link_type='careers page', url='https://www.anthropic.com/careers'), Link(link_type='team page', url='https://www.anthropic.com/team'), Link(link_type='events page', url='https://www.anthropic.com/events'), Link(link_type='news page', url='https://www.anthropic.com/news')]


# Anthropic Brochure

## **About Us**
Anthropic is a pioneering AI safety and research company focused on creating reliable, interpretable, and steerable AI systems. At the forefront of AI innovation, our mission is to harness the potential of artificial intelligence for the long-term well-being of humanity. We prioritize safety as a systematic science and aim to ensure the responsible development and deployment of AI technologies.

### **Our Purpose**
We believe that AI will profoundly impact society and thus strive to build systems that prioritize safety, reliability, and understanding.

## **Our Products**
### **Claude**
Meet **Claude**, our flagship AI model family designed to enhance workflows across various sectors, including coding, customer support, and education. With models such as **Claude Opus 4** and **Claude Sonnet 4**, we are pushing the boundaries of what AI can achieve for businesses and individuals alike.

## **Customer Solutions**
Anthropic partners with businesses and organizations to create AI-driven applications that cater to:
- **AI agents**
- **Customer support**
- **Coding assistance**
- **Educational tools**

Explore our diverse offerings and discover how Claude can transform your operations.

## **Company Culture**
At Anthropic, we foster an inclusive and collaborative environment that values diverse experiences and perspectives. Our team consists of researchers, engineers, policy experts, and operational leaders dedicated to enhancing AI safety.

### **Core Values**
1. **Act for Global Good**: We prioritize decisions that maximize positive outcomes for humanity.
2. **Hold Light and Shade**: Balancing potential risks and rewards is paramount in our mission.
3. **Be Good to Our Users**: We cultivate kindness and generosity in all our interactions.
4. **Ignite a Race to the Top on Safety**: We encourage a competitive standard for AI safety across the industry.
5. **Do the Simple Thing That Works**: We favor empirical and effective solutions over complexity.
6. **Be Helpful, Honest, and Harmless**: We foster clear communication and mutual respect.
7. **Put the Mission First**: Our shared purpose drives collaboration and accountability.

## **Careers at Anthropic**
We are on the lookout for passionate individuals to join our diverse team as we advance the frontier of AI safety. If you're excited about shaping the future of AI and sharing our commitment to responsible development, explore our current job openings to find your place at Anthropic.

### **Your Impact**
Working at Anthropic means contributing to a crucial mission: ensuring that AI is developed and utilized responsibly for the benefit of all.

---

For more information on our products, career opportunities, and the latest news, please visit our website at [Anthropic.com](https://www.anthropic.com). 

Together, let's build a safer and more responsible AI future!

In [61]:
res_hugginface = create_stream_brochure('Huggin Face', 'https://huggingface.co/')

Found links: [Link(link_type='careers page', url='https://apply.workable.com/huggingface/'), Link(link_type='company page', url='https://www.linkedin.com/company/huggingface/')]



# Hugging Face: The AI Community Building the Future

## About Us
Hugging Face is a pioneering technology company founded in 2016, dedicated to democratizing artificial intelligence through natural language processing (NLP) and deep learning. We create a collaborative platform where developers, researchers, and organizations can connect over more than **1 million models**, **250,000 datasets**, and countless applications.

### Our Mission
To transform and democratize AI, making it accessible for everyone. We believe in open-source principles, working alongside our community to build an innovative ecosystem that promotes shared learning and collaborative growth.

## Company Culture
At Hugging Face, we foster a diverse and inclusive environment that encourages creativity and collaboration. Our team size ranges from **51 to 200** members, promoting an intimate, yet impactful workplace where everyone's ideas are valued. We believe in:

- **Innovation**: Encouraging groundbreaking solutions in AI.
- **Community Engagement**: Actively involving users in projects and developments.
- **Continuous Learning**: Providing resources and opportunities for skill development.

## Products and Services
Hugging Face offers a suite of products and services tailored for various machine learning needs:

- **Hugging Face Hub**: Host and collaborate on unlimited public models and datasets.
- **Compute Solutions**: Provide enterprise-grade security and support for advanced AI applications.
- **Transformers Library**: A state-of-the-art library that integrates seamlessly with PyTorch, TensorFlow, and JAX.

### Notable Features
- **Spaces**: Create and share interactive ML applications.
- **Accelerate**: Optimize model training for multi-GPU and TPU use.

## Who We Serve
Over **50,000 organizations** globally trust and utilize Hugging Face's resources, including major players like:

- **Amazon**
- **Google**
- **Meta**
- **Microsoft**

This collaboration with industry leaders fosters a vibrant community focused on pushing the boundaries of machine learning.

## Careers at Hugging Face
We're always on the lookout for passionate individuals who share our vision. We offer roles in various areas, from engineering and data science to product management and community leadership. Join our team to help shape the future of AI.

### Current Openings
Explore various job opportunities that align with your skills and interests on our [Careers Page](https://huggingface.co/jobs).

## Join Us
Interested in becoming part of our exciting journey to democratize AI? Sign up today or reach out for collaboration opportunities!

[Visit Our Website](https://huggingface.co) | [Join the Community](https://huggingface.co/community)

---

Together, let’s build the future of AI!
