In [2]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from IPython.display import Markdown, display
from bs4 import BeautifulSoup
from openai import OpenAI

In [3]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
    print('couldn\'t catch API key')
model = 'gpt-4o-mini'
openai = OpenAI()

In [4]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self,url):
        self.url =  url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else ''
        if soup.body:
            for irrelevent in soup.body(["script", "style", "img", "input"]):
                irrelevent.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ''

        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def website_content(self):
        return (f'The page title is:{self.title}\nThe content is:\n{self.text}')

In [5]:
#a = Website('https://www.nytimes.com/international/')
a = Website('https://edwarddonner.com')
print(a.title)

Home - Edward Donner


In [6]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
    Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
def get_relevant_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=model, 
        messages=[
        {'role': 'system', 'content': link_system_prompt},
        {'role': 'user', 'content': get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [130]:
get_relevant_links('https://huggingface.co')

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'community discussion page',
   'url': 'https://discuss.huggingface.co'},
  {'type': 'company social media page',
   'url': 'https://twitter.com/huggingface'},
  {'type': 'company social media page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [11]:
def get_all_details(url):
    content = f"Landing page:\n: {Website(url).website_content()}"
    links = get_relevant_links(url)
    for link in links['links']:
        content += f"\n\n{link['type']}\n"
        content += Website(link["url"]).website_content()
    return content

In [16]:
#print(get_all_details('https://huggingface.co'))

In [21]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [22]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt # Truncate if more than 5,000 characters
    return user_prompt

In [19]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=model,
        messages = [
        {'role': 'system' , 'content': system_prompt},
        {'role': 'user' , 'content': get_brochure_user_prompt(company_name, url)}
        ]
    )
    result = response.choices[0].message.content
    return result

In [23]:
result = create_brochure("HuggingFace", "https://huggingface.co")

In [24]:
display(Markdown(result))

# Hugging Face Company Brochure

## Welcome to Hugging Face
**The AI community building the future.**  
At Hugging Face, we are on a mission to democratize machine learning by providing a collaborative platform for creators, researchers, and enterprises. Our community is where AI enthusiasts come together to explore and innovate, working with models, datasets, and applications across various domains.

## What We Offer
- **1M+ Models & 250k+ Datasets:** Explore a vast library of machine learning models and datasets. Our platform supports collaboration on an unlimited number of public resources, enabling faster and more effective development in the AI space.
- **AI Apps:** Discover AI applications created by our community. Whether it’s generating images or building conversational AI, there’s a solution for every problem.
- **Enterprise Solutions:** We provide specialized compute solutions and enterprise-grade security, ensuring that organizations can innovate securely and efficiently.

## Our Customers
Hugging Face is proud to serve over **50,000 organizations**, including industry leaders like:
- **Microsoft**
- **Google**
- **Amazon**
- **Intel**
- **Grammarly**

These organizations leverage our cutting-edge technology and comprehensive support to enhance their AI capabilities.

## Company Culture
At Hugging Face, we believe in:

- **Collaboration:** Our work environment encourages team collaboration, sharing ideas, and learning from each other. 
- **Open Source:** Our commitment to open-source initiatives supports the collective growth of the AI community, as we share our technologies and models with the world.
- **Innovation:** We foster a culture of creativity and experimentation, empowering our team to push the boundaries of what AI can achieve.

## Careers at Hugging Face
Join our passionate team of about **213 individuals** dedicated to making a difference in the machine learning landscape. We are always looking for innovative minds to help us tackle the challenges of AI. Our roles span across various functions including engineering, research, marketing, and community support.

### Benefits of Working with Us:
- Work on groundbreaking AI technologies.
- Collaborative work environment.
- Flexible and inclusive workplace that values diverse perspectives.
- Opportunities for professional growth and development.
  
### Join Us
**Explore Opportunities:**  
We are eager to expand our team with talented individuals who resonate with our mission. [Check out our job openings!](https://huggingface.co/jobs)

---

## Connect with Us
Stay updated with our latest developments, articles, and community discussions:
- [GitHub](https://github.com/huggingface)
- [Twitter](https://twitter.com/huggingface)
- [LinkedIn](https://www.linkedin.com/company/huggingface)

### Let's Build the Future of AI Together!  
Join us at Hugging Face and be part of a community that is shaping the future of machine learning. Sign up today at [huggingface.co](https://huggingface.co). 

--- 

© 2023 Hugging Face, Inc. All rights reserved.

## Translate to other lang