# Sections 1 : Get the Relevant Links

In [14]:
import os 
import requests
import json
import validators
from bs4 import BeautifulSoup
from openai import OpenAI
from IPython.display import Markdown, display, update_display
from dotenv import load_dotenv

In [15]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.split() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
elif api_key[:8] != "sk-proj-":
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
else :
    print("API key found and looks good so far!")

API key found and looks good so far!


In [16]:
openai = OpenAI()

In [17]:
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"}

In [18]:
myurl = input("Enter the required URL")

if validators.url(myurl):
    print("Valid URL.")
else:
    print("Invalid URL")

Enter the required URL https://huggingface.co/


Valid URL.


In [19]:
response = requests.get(myurl,headers=headers)

In [20]:
print(response.status_code)

200


In [21]:
soup=BeautifulSoup(response.content,'html.parser')

In [22]:
title = soup.title.string if soup.title else "No title found."

for irrelevant in soup.body(["script","img","style","input"]):
    irrelevant.decompose()
text = soup.body.get_text(separator="\n",strip=True)

In [23]:
def get_contents(title,text):
    return f"Webpage Title:\n{title}\nWebpage Contents:\n{text}\n\n"

In [24]:
all_links = soup.find_all('a')
links = [link.get('href') for link in all_links if link.get('href')]

In [25]:
system_links_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_links_prompt += "You should respond in JSON as in this example:"

system_links_prompt +="""
{
"links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [26]:
def get_user_links_prompt(myurl):
    user_prompt = f"Here is the list of links on the website of {myurl} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(links)
    return user_prompt

In [27]:
messages_links = [
    {'role':'system','content':system_links_prompt},
    {'role':'user','content':get_user_links_prompt(myurl)}
]

In [28]:
def get_relevant_links(myurl):
    response= openai.chat.completions.create(
        messages = messages_links,
        model="gpt-4o-mini",
        response_format= {'type':'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [29]:
get_relevant_links(myurl)

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'GitHub page', 'url': 'https://github.com/huggingface'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

# Section 2 : Making the Brochure

In [30]:
def scrape_web(myurl):
    response = response = requests.get(myurl,headers=headers)
    soup=BeautifulSoup(response.content,'html.parser')
    title = soup.title.string if soup.title else "No title found."

    for irrelevant in soup.body(["script","img","style","input"]):
       irrelevant.decompose()
    text = soup.body.get_text(separator="\n",strip=True)
    return title,text

In [32]:
def get_all_details_links(myurl):
    result = "Loading Page\n"
    result += get_contents(title,text)
    links = get_relevant_links(myurl)
    print("The links are : ", links)
    for link in links['links']:
        page_url=link['url']
        page_type=link.get('type','Link')
        page_title,page_text= scrape_web(page_url)
        result += f"\n\n{page_type}\n"
        result += get_contents(page_title,page_text)
    return result

In [33]:
company_name = input("The name of the company is : ")

The name of the company is :  Hugging Face


In [34]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [35]:
def user_prompt(company_name,myurl):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details_links(myurl)
    return user_prompt

In [36]:
messages_brochure=[
    {'role':'system','content':system_prompt},
    {'role':'user','content':user_prompt(company_name,myurl)}
]

The links are :  {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company blog', 'url': 'https://huggingface.co/blog'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'documentation', 'url': 'https://huggingface.co/docs'}]}


In [54]:
def get_brochure(company_name,myurl):
    response = openai.chat.completions.create(
        messages = messages_brochure,
        model = "gpt-4o-mini"
    )
    result = response.choices[0].message.content
    return result

In [57]:
print(display(Markdown(get_brochure(company_name,myurl))))

# 🥳 Welcome to Hugging Face – The AI Community Building Your Future! 🚀

**Are you ready to hug the future of machine learning? 🐻🤗 Well, we've got a warm, fuzzy algorithm just for you!**

---

### 🌍 What is Hugging Face?

At Hugging Face, we're not just another AI company; we’re the *life of the machine learning party*! With over **1 million models** and **250,000+ datasets**, our platform serves as the ultimate playground for **AI enthusiasts and experts** alike. Whether you want to discover the latest in AI apps or collaborate with a thriving community, we’re the place to be!

### ☕ Culture: The Hugging Face Vibe

#### **Open Source & Collaboration**  
We're non-conformists, openly collaborating, and openly caring! Our community empowers every creator, engineer, and developer to help **democratize machine learning** — because why not share a little love? 💕 Plus, we have plenty of memes!

#### **Work That Wows**  
Here, your ideas are worth their weight in (virtual) gold! Whether you’re training machines or just training for a marathon of coding, you’ll find support, encouragement, and maybe even a virtual high-five. 🖐️

### 💼 Careers: Join the Family

Looking for a place to call home? Check out our job openings! We offer flexible contracts, competitive pay, and the chance to work with **50,000+ organizations**, including the likes of Google, Microsoft, and Grammarly. 

#### Perks of the Job:
- **Before Coffee:** Work-life balance that makes you want to jump out of bed (well, almost). 
- **After Coffee:** Great support and team vibes that make every project sparkle! ✨

---

### 💰 Pricing: Embrace Your Inner Geek

**Becoming a part of the Hugging Face family can be as sweet as a scoop of algorithm-flavored ice cream! 🍦**

- **PRO Account**: Just **$9/month** for all the *cool* features and more storage than your brain can handle!
- **Team Plan**: Only **$20/month** per user, because good things come in packs!
- **Enterprise Plan**: Starting at **$50/month**, if you're trying to impress the entire office with AI smarts!

---

### 💡 Join the Future, Hugging Face Style!

Ready to **TensorFlow** your dreams into reality? Whether you want to test your muscles against the latest models or just dabble in some friendly AI competitions, Hugging Face is the place for you! Join us today and let’s make the future as cuddly as a teddy bear! ❤️

#### **Follow Us on Social** 
- [Twitter](https://twitter.com/huggingface)
- [LinkedIn](https://linkedin.com/company/hugging-face)
- [Discord](https://discord.gg/huggingface)

---

**🤗 Come for the models, stay for the hugs!** Because at Hugging Face, we believe that every neural network deserves a bear hug!

None


# Section 3 : [Optional] --> Stream the Brochure

In [39]:
# If you want to get the output like in CHATGPT like loading chunk by chunk here you go

In [40]:
def stream_brochure(company_name,myurl):
    stream = openai.chat.completions.create(
        messages = messages_brochure,
        model = "gpt-4o-mini",
        stream = True
    )
    response = " "
    display_handle = display(Markdown(""),display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","")
        response = response.replace("markdown","")
        update_display(Markdown(response),display_id=display_handle.display_id)

In [41]:
stream_brochure(company_name,myurl)

 
# Welcome to Hugging Face!
**The AI community building the future** 🤖

### Who Are We?
At Hugging Face, we’re all about making machine learning accessible and enjoyable—like finding the last slice of pizza at a party! Our platform is a bustling hub for researchers, developers, and AI enthusiasts to collaborate, share, and deploy their own models, datasets, and applications. 🚀

---

### Our Offerings:
- **1M+ Models** to choose from — because who would want to date just one?
- **250k+ Datasets** to fuel your creativity (or your nefarious plans).
- **Exciting Spaces** for your cool AI apps — think of it as your AI Airbnb, but with less avocado toast.
- **Community Support** that’s always ready to give you a hug (virtually)!

---

### Our Customers:
Join over **50,000 organizations**, including the cool kids like **Google, Microsoft, and Intel**. We provide them the tools to make machines smarter, while they make us feel special with their data. Who doesn’t love a techy fan club? 💼

---

### Careers at Hugging Face:
Want to join the fun? We’re on a mission to democratize machine learning. If that sounds like your kind of party, **check out our job openings!** Whether you're a wizard codemaker or a number-crunching Sorcerer of Data, we’ve got a spot for you—just bring your magic! ✨

**Perks of Joining Us:**
- A culture as warm as a freshly baked cookie. 🍪
- **Flexible hours**, because who doesn't love waking up at 10 AM? 
- Work with forward-thinkers who are shaping the future—without the stress of wearing business formal attire!

---

### Join Us Today!
Dive into the world of AI with Hugging Face. Whether you're here for collaboration, jobs, or just to see what the buzz is about, we promise you a good time (and some cool models).

**Ready to take the leap? [Join Us!](https://huggingface.co/sign-up)**

*Remember: Hugging Face is not responsible for any sudden urges to hug a robot!* 🤗🤖



# Section 4 : Converting the Brochure's language to Japanese

In [58]:
system_lang_prompt = """
You are a professional copywriter and translator.
Your task is to take the provided brochure content and produce a Japanese version.

Requirements:
1. Translate all text into natural, fluent Japanese.
2. Infuse humor and wit throughout, while keeping the original meaning and tone intact.
   - Add light jokes, playful phrasing, and clever word choices that resonate with Japanese humor.
   - Do not make it offensive or culturally inappropriate.
3. Respond in markdown. Also, preserve the structure of the brochure (headings, subheadings, bullet points, formatting).
4. Ensure the final text feels like a professionally written humorous Japanese brochure, not a literal translation.
5. Keep the message persuasive, engaging, and audience-friendly.

Always output only the Japanese humorous version of the brochure, nothing else.
"""

In [59]:
def user_lang_prompt_for(company_name,myurl):
    user_prompt = f"Here is the brochure text for {company_name}, retrieved from {myurl}.\n\n"
    user_prompt += "Your task:\n"
    user_prompt += "1. Take this brochure content and translate it into natural, fluent Japanese.\n"
    user_prompt += "2. Infuse humor and wit throughout, while keeping the original meaning and tone intact.\n"
    user_prompt += "3. Keep the formatting, structure, and section flow (headings, bullet points, etc.) the same.\n"
    user_prompt += "4. Output only the final humorous Japanese brochure text, nothing else.\n\n"
    user_prompt += "I want the response in form of markdown\n"
    user_prompt += "--- START BROCHURE CONTENT ---\n"
    user_prompt += get_brochure(company_name,myurl)
    return user_prompt

In [60]:
def stream_lang_brochure(company_name,myurl):
    stream = openai.chat.completions.create(
        messages = [
            {'role':'system','content':system_lang_prompt},
            {'role':'user','content': user_lang_prompt_for(company_name,myurl)}
        ],
        model="gpt-4o-mini",
        stream=True
    )
    response = " "
    display_handle = display(Markdown(""),display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace('```','')
        response = response.replace('markdown','')
        update_display(Markdown(response),display_id=display_handle.display_id)

In [61]:
stream_lang_brochure(company_name,myurl)

 # ハグフェイス：AIがハグをする場所！🤗

---

**ハグフェイスへようこそ！**  
AI宇宙の柔らかい腕の中にたたずむ私たちは、ただモデルを作るためだけにいるわけではありません。ハグをしに来たのです！ハグフェイスでは、人工知能はもう少し…人間らしくあるべきだと信じています。

---

## どうしてハグフェイス？

### 🤖 **モデルマニア**
- **1百万以上のモデルをチェック！**: マイクロソフトの音声モデュレーションからミニテキストプロセッサまで、あなたのオバアちゃんも誇れるモデルが揃っています（AIが何か知っていればの話ですが）。
- **データセットが盛りだくさん**: 250,000以上のデータセットの宝物をひと泳ぎしましょう！はい、あのデータセットもありますよ！

### 🏗️ **アプリスペース無限大**
- **アプリケーションスペースを運営**: 汗をかかずに自分のアプリを作りたいと思ったことはありませんか？私たちと一緒なら、「機械学習」と言うより早くAIアプリケーションを立ち上げられます！

---

## 会社文化：コードとハートを持ってきてね 👩‍💻❤️👨‍💻
いい機械学習を**民主化する**というミッションのもと、私たちはフレンドリーなコミットを一つずつ実現しています。私たちはAI愛好家の幸せな部族、協力と多様性を祝って、時にはAIのバースデーパーティーまで楽しんでいます🥳。

ここではあなたができます：
- **ゼロGPUパワー**でMLウィザードになりきる！🧙‍♂️
- 自分の作品をオープンにシェアし、MLポートフォリオを作成。腕前を見せつけるのにこれ以上の方法はありませんよね？
- **50,000以上の組織**、Googleやマイクロソフトのような大企業に参加。バーチャルで肩を組み、アイデアを共有しましょう！

---

### キャリアチャンス：楽しさのある場所で働こう！🌟
お気に入りのフーディを着て、AIの未来に貢献したいですか？機械学習の世界に飛び込む好奇心旺盛なマインドを歓迎します！オープンな役職をチェックして、あなたの内なるコーディング忍者を解き放ちましょう！🥷

---

## 価格：💰 すべての予算に対応
- **プロプラン**: 月額たったの9ドル。AIには**エクストラクレジットとストレージ**が必要ですね。
- **チームプラン**: 月額20ドル/ユーザーで、成長するチームに最適な安全な共同作業スペースを提供。
- **エンタープライズパッケージ**: スケールアップの準備ができたら、もしかしたらユニコーンも見つけられるかもしれません。🦄

---

## ハグムーブメントに参加しよう！
ただの観客にならず、ハグが必要なAI革命の一部になりましょう！  
さあ、サインアップして、マシンラーニングの冒険にちょっとした温もりを持ち込みましょう。結局のところ、ハグとともに顔が欲しくない人なんているでしょうか？

---

**ハグフェイス - 未来を築くAIコミュニティ！**  
AIを楽しみに来て、ハグで帰る！🤗

[もっと見る！](https://huggingface.co)  
**フォローしてね**: [Twitter](https://twitter.com/huggingface) - [Discord](https://discord.gg/huggingface) - [GitHub](https://github.com/huggingface)

---

あなたがAIを革命的に変えようとしているのか、ただ楽しく学びたいだけなのか、ハグフェイスがあなたを待っています！🎉