# Sections 1 : Get the Relevant Links

In [46]:
import os 
import requests
import json
import validators
from bs4 import BeautifulSoup
from openai import OpenAI
from IPython.display import Markdown, display, update_display
from dotenv import load_dotenv

In [5]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.split() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
elif api_key[:8] != "sk-proj-":
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
else :
    print("API key found and looks good so far!")

API key found and looks good so far!


In [6]:
openai = OpenAI()

In [7]:
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"}

In [10]:
myurl = input("Enter the required URL")

if validators.url(myurl):
    print("Valid URL.")
else:
    print("Invalid URL")

Enter the required URL https://huggingface.co/


Valid URL.


In [11]:
response = requests.get(myurl,headers=headers)

In [15]:
print(response.status_code)

200


In [16]:
soup=BeautifulSoup(response.content,'html.parser')

In [17]:
title = soup.title.string if soup.title else "No title found."

for irrelevant in soup.body(["script","img","style","input"]):
    irrelevant.decompose()
text = soup.body.get_text(separator="\n",strip=True)

In [33]:
def get_contents(title,text):
    return f"Webpage Title:\n{title}\nWebpage Contents:\n{text}\n\n"

In [19]:
all_links = soup.find_all('a')
links = [link.get('href') for link in all_links if link.get('href')]

In [22]:
system_links_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_links_prompt += "You should respond in JSON as in this example:"

system_links_prompt +="""
{
"links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [23]:
def get_user_links_prompt(myurl):
    user_prompt = f"Here is the list of links on the website of {myurl} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(links)
    return user_prompt

In [25]:
messages_links = [
    {'role':'system','content':system_links_prompt},
    {'role':'user','content':get_user_links_prompt(myurl)}
]

In [28]:
def get_relevant_links(myurl):
    response= openai.chat.completions.create(
        messages = messages_links,
        model="gpt-4o-mini",
        response_format= {'type':'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [29]:
get_relevant_links(myurl)

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'company page', 'url': 'https://huggingface.co/brand'},
  {'type': 'community page', 'url': 'https://discuss.huggingface.co'},
  {'type': 'status page', 'url': 'https://status.huggingface.co/'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}

# Section 2 : Making the Brochure

In [30]:
def scrape_web(myurl):
    response = response = requests.get(myurl,headers=headers)
    soup=BeautifulSoup(response.content,'html.parser')
    title = soup.title.string if soup.title else "No title found."

    for irrelevant in soup.body(["script","img","style","input"]):
       irrelevant.decompose()
    text = soup.body.get_text(separator="\n",strip=True)
    return title,text

In [37]:
def get_all_details_links(myurl):
    result = "Loading Page\n"
    result += get_contents(title,text)
    links = get_relevant_links(myurl)
    print("The links are : ", links)
    for link in links['links']:
        page_url=link['url']
        page_type=link.get('type','Link')
        page_title,page_text= scrape_web(page_url)
        result += f"\n\n{page_type}\n"
        result += get_contents(page_title,page_text)
    return result

In [32]:
company_name = input("The name of the company is : ")

The name of the company is :  Hugging Face


In [39]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [40]:
def user_prompt(company_name,myurl):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details_links(myurl)
    return user_prompt

In [42]:
messages_brochure=[
    {'role':'system','content':system_prompt},
    {'role':'user','content':user_prompt(company_name,myurl)}
]

The links are :  {'links': [{'type': 'about page', 'url': 'https://huggingface.co/'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'github page', 'url': 'https://github.com/huggingface'}, {'type': 'linkedin page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'twitter page', 'url': 'https://twitter.com/huggingface'}]}


In [48]:
def get_brochure(company_name,myurl):
    response = openai.chat.completions.create(
        messages = messages_brochure,
        model = "gpt-4o-mini"
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [49]:
get_brochure(company_name,myurl)

# 🤗 **Welcome to Hugging Face's Brochure!** 🤗

### **The AI Community Building the Future!**
Join us on this thrilling adventure into the world of Machine Learning — where code writes poetry and data dances!

---

### **What's the Buzz?** 🐝

1. **Models Galore!**
   With over **1 million models** available, we’re basically the Amazon of AI – if AWS sold teddy bears instead of cloud services. From text to images, we’ve got it all.
  
2. **Datasets That Make You Go "Wow!"**
   Need data for your next blockbuster ML project? With **250,000+ datasets** at your fingertips, you’ll be swimming in information faster than a neural network can train! (No scuba gear required.)

3. **Spaces to Create!**
   We offer **400,000+ applications** and a bunch of running projects – including video generation that’ll have you saying, “Why didn’t I think of that?” 

---

### **Culture & Community: Where Hugging Happens!** 🥰

At Hugging Face, we believe in collaboration! Our company culture is as warm as a freshly baked cookie. 

- **Open Source Organization**: We build **together** - because why do it alone when you can have pals to help? 
- **Community Driven**: From casual discussions on forum posts (“How do I delete my account?”) to brainstorming the next big AI revolution, everyone’s voice matters. Plus, you can even join debates on whether a tomato is a fruit or vegetable while working on that complex model! 🍅

---

### **Diverse (and Fun) Careers!**
Imagine a workplace where you can wear your *favorite* AI t-shirt while computing the secrets of the universe! 

- **Current Opportunities:** If you’re passionate about crafting machine learning models, we have roles tailored just for you. 
- **Benefits that’ll Melt Your Heart**: Competitive pays, the flexibility of remote work, and a community that encourages you to innovate without limits.

**Apply today and join us — because AI awaits no one!** 🚀

---

### **Customer Cheering Section!**
Join **over 50,000 organizations** including:
- Google
- Amazon
- Microsoft
- Your cool neighbor who speaks fluent Python!

They trust us with their ML projects. Why not join the club? 🎉

---

### **Join the Hug Squad!**
Ready to jump into the future? Sign up today and experience how Hugging Face can reshape your perception of AI!

👉 [Join Now!](https://huggingface.co/sign-up)

---
### **Punny Terminology:**
- **Weights & Biases**: Not just for clothes! (But also clothes, mind you.)
- **Training**: Not just a gym term! Get your models pumping! 💪
- **Inference**: Using the brain. (We use our computers – close enough!)

--- 

Hugging Face: The whimsical world where AI meets camaraderie, and you're invited to join the fun! 

### **Contact Us!**
- **Email**: hello@huggingface.co
- **Follow us**: [Twitter](https://twitter.com/huggingface) | [LinkedIn](https://linkedin.com/company/hugging-face)

Let's revolutionize AI together — one hug at a time! 🤗

# Section 3 : [Optional] --> Stream the Brochure

In [None]:
# If you want to get the output like in CHATGPT like loading chunk by chunk here you go

In [50]:
def stream_brochure(company_name,myurl):
    stream = openai.chat.completions.create(
        messages = messages_brochure,
        model = "gpt-4o-mini",
        stream = True
    )
    response = " "
    display_handle = display(Markdown(""),display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","")
        response = response.replace("markdown","")
        update_display(Markdown(response),display_id=display_handle.display_id)

In [51]:
stream_brochure(company_name,myurl)

 # Welcome to Hugging Face: The AI Community Snuggling Up for the Future!

### 🤗 Where AI and Passion Hug it Out

At **Hugging Face**, we’re not just building AI; we're crafting a community that's as warm and fuzzy as a teddy bear on a snow day. Join about **50,000+ organizations** that are cozying up with us, from Google and Amazon to little startups hoping to one day be as cuddly as we are.

---

### 🧠 Our Superpowers: Models, Datasets, and Spaces

- **Models**: Over **1,000,000** handpicked AI models snuggled up and waiting just for you. It's like a well-kept library but without the late fees!
  
- **Datasets**: With a treasure trove of **250,000 datasets**, we make sure you have everything you need to keep your algorithms well-fed. No more hungry AIs around here!

- **Spaces**: Create and collaborate in **12.5k Spaces**. Imagine social media but for coders, minus the trolls and with way more data!

---

### 🌍 Our Community: The Ultimate Support Group

Our community is like a warm blanket over a cold dataset. It’s where novice tinkerers become AI wizards, and seasoned developers can roll in the *magic of collaboration*. Whether you're a newbie or a tech wizard, we’ve got your back—no judgment, just support!

---

### 💼 Careers: Join Us on This Cozy Journey!

At Hugging Face, we don’t just want employees; we’re looking for future members of our feel-good family! We offer:

- **Flexible Work Arrangements**: Work from a cozy nook or your local café—pants optional (unless you’re on video calls).
- **AI-Powered Projects**: Get your hands messy with state-of-the-art tech without all the boring corporate stuff!
- **Community Spirit**: You’ll never face your coding woes alone—trust us, your teammates have got your back (and good snacks).

---

### 🎉 Join the Hugging Face Family!

Want to be part of something that’s growing faster than a weed in spring? If you're passionate about **machine learning**, **natural language processing**, or just love a community that gives back, check out our [careers page](https://huggingface.co/careers)! 

---

### 🎈 Why Choose Hugging Face? 

Because life’s too short for dull software! Here, we blend purpose with empowerment, sprinkling a little humor on high-tech solutions. Join us today and help shape tomorrow!

---

**Hugging Face: Where AI gets a big, warm hug!**