In [179]:
import os
import requests
import validators
import json
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [180]:
load_dotenv()
api_key=os.getenv("OPENAI_API_KEY")

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.split() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
elif api_key[:8] != "sk-proj-":
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
else :
    print("API key found and looks good so far!")

API key found and looks good so far!


In [181]:
openai = OpenAI()

In [182]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [183]:
myurl = input("Enter the Website:")
if validators.url(myurl):
    print("Valid Website")
else:
    print("Invalid Website. Please Try again.")

Enter the Website: https://huggingface.co/


Valid Website


In [184]:
response = requests.get(myurl,headers=headers)

In [185]:
soup = BeautifulSoup(response.content,'html.parser')

In [186]:
title = soup.title.string if soup.title else "No Title found. Give Proper Website Please"

In [187]:
for irrelevant in soup.body(['style','img','input','script']):
    irrelevant.decompose()
text = soup.body.get_text(separator='\n',strip=True)

In [190]:
def get_contents(title,text):
     return f"Webpage Title:\n{title}\nWebpage Contents:\n{text}\n\n"

In [191]:
all_links = soup.find_all('a')
links = [link.get('href') for link in all_links if link.get('href')]

In [192]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "Respond in JSON format as per the given example."
link_system_prompt += """
{
"links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"},
    ]
}
"""

In [193]:
def get_links_user_prompt(myurl):
    user_prompt = ""
    user_prompt+= f"You are currently watching the website:{myurl}"
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links:(Some might be relative links):\n"
    user_prompt += '\n'.join(links)
    return user_prompt

In [194]:
messages = [
    {'role':'system','content':link_system_prompt},
    {'role':'user','content':get_links_user_prompt(myurl)}
]

In [195]:
def get_links(myurl):
    response = openai.chat.completions.create(
        messages = messages,
        model= "gpt-4o-mini",
        response_format = {'type':'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [196]:
get_links(myurl)

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'docs page', 'url': 'https://huggingface.co/docs'},
  {'type': 'contact page', 'url': 'https://huggingface.co/contact'},
  {'type': 'social media page', 'url': 'https://twitter.com/huggingface'},
  {'type': 'social media page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [197]:
def scrape_web(myurl):
    '''Scrape any webpage easily using this function'''
    response = requests.get(myurl,headers=headers)
    soup = BeautifulSoup(response.content,'html.parser')

    title = soup.title.string if soup.title else "No Title found. Give Proper Website Please"

    for irrelevant in soup.body(['style','img','input','script']):
        irrelevant.decompose()
    text = soup.body.get_text(separator='\n',strip=True)
    return title, text

In [198]:
def get_all_details(myurl):
    result = 'Landing Page \n'
    result += get_contents(title,text)
    links = get_links(myurl)
    print('Found the Links',links)
    for link in links['links']:
        page_url = link['url']
        page_type = link.get('type','Link')
        result += f"\n\n{page_type}\n"
        page_title,page_text = scrape_web(page_url)
        result += get_contents(page_title,page_text)
    return result

In [199]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [200]:
company_name = input("Enter the name of the Company : ")

Enter the name of the Company :  Hugging Face


In [201]:
def user_prompt_for(company_name,myurl):
    user_prompt = f"You are looking at website called : {company_name}\n"
    user_prompt+= f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt+= get_all_details(myurl)
    user_prompt = user_prompt[:20000]
    return user_prompt

In [202]:
messages_brochure = [
    {'role':'system','content':system_prompt},
    {'role':'user','content':user_prompt_for(company_name,myurl)}
]

Found the Links {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}


In [205]:
def get_brochure(company_name,myurl):
    response = openai.chat.completions.create(
        messages = messages_brochure,
        model = 'gpt-4o-mini'
    )
    result = response.choices[0].message.content
    return result

In [208]:
display(Markdown(get_brochure(company_name,myurl)))

# Hugging Face Brochure

---

**Company Overview**  
Hugging Face is a leading AI community and collaboration platform dedicated to building the future of machine learning. With a focus on democratizing machine learning, Hugging Face serves as the home for models, datasets, and applications in AI, allowing users to create, share, and collaborate efficiently.

**Mission**  
We aim to empower everyone—individuals and organizations alike—to access cutting-edge AI tools and technologies, driving innovation and fostering a global community of ML enthusiasts.

---

## **Company Culture**

At Hugging Face, we value open collaboration and community engagement. Our culture promotes:

- **Inclusivity:** Encouraging participation from all backgrounds in machine learning innovation.
- **Transparency:** Openness in our processes and decisions, leading to trust and accountability.
- **Continuous Learning:** Providing resources and forums for our community to learn and grow together.
- **Innovation:** Encouraging creativity and experimentation, leading to groundbreaking developments in AI.

---

## **Our Offerings**

### **Models and Datasets**
Hugging Face hosts over **1 million models** and **250,000 datasets** in various modalities, including text, image, video, and audio. Our platform allows users to browse, create, and utilize these resources, fostering an environment of continuous improvement and collaboration.

### **Community Spaces**
- **Connect:** Join and build apps in our popular Spaces, hosting dynamic machine learning applications.
- **Explore:** Engage with trending models and showcase your work.

### **Enterprise Solutions**
We provide advanced AI solutions for organizations featuring:
- **Dedicated Support**
- **Advanced Security**
- **Customizable Analytics**
- **Scalable Compute Resources**  
Starting at $20/user/month for team solutions, we help speed up your AI projects with enterprise-grade features.

---

## **Customer Base**

More than **50,000 organizations** trust Hugging Face, including giants like:
- Google
- Microsoft
- Amazon
- Meta
- Grammarly  

Our commitment ensures that these brands, along with numerous startups and individuals, have access to state-of-the-art machine learning solutions.

---

## **Careers at Hugging Face**

We are always on the lookout for passionate individuals to join our team. Working at Hugging Face means being part of a dynamic and innovative community dedicated to transforming the AI landscape. Open positions include roles in engineering, research, product management, and community engagement.

### **Why Work with Us?**
- **Impactful Work:** Contribute to projects that advance AI for everyone.
- **Flexible Environment:** Enjoy a culture that promotes work-life balance.
- **Growth Opportunities:** Participate in diverse and challenging projects that expand your expertise.

Join us on our mission to democratize good machine learning, one commit at a time! Check our **[Careers page](https://huggingface.co/jobs)** for open positions.

---

## **Get Started!**
Experience the future of AI today—join our community and take part in the open-source movement within machine learning. Whether you're a developer, researcher, entrepreneur, or enthusiast, Hugging Face invites you to explore, innovate, and contribute.

**Visit us at:** [huggingface.co](https://huggingface.co)

--- 

Become a part of the collaborative AI revolution today with Hugging Face!

In [209]:
def get_stream_brochure(company_name,myurl):
    stream = openai.chat.completions.create(
        messages = messages_brochure,
        model = 'gpt-4o-mini',
        stream=True
    )
    response = ''
    display_handle = display(Markdown,display_id = True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace('```','').replace('markdown','')
        update_display(Markdown(response),display_id=display_handle.display_id)

In [210]:
get_stream_brochure(company_name,myurl)


# Hugging Face Brochure

## Welcome to Hugging Face
**The AI community building the future.**  
Join a collaborative platform where machine learning experts come together to share models, datasets, and applications. Whether you're a developer, researcher, or enthusiast, Hugging Face is your home for AI advancement.

---

## Who We Are
At Hugging Face, we are on a mission to democratize machine learning. Our open-source approach allows everyone to collaborate and innovate, making machine learning more accessible for all. With over **1 million models and 250,000 datasets**, our platform supports a myriad of AI applications across various modalities, including text, image, video, and audio.

---

## Company Culture
Our team consists of passionate individuals devoted to creating a vibrant and supportive environment for AI and ML enthusiasts. We believe in the power of community and collaboration, fostering a culture that encourages creativity and knowledge sharing. We value every contribution, big or small, following the mantra: "one commit at a time."

---

## Together, We Build
Join **more than 50,000 organizations**, including industry giants like Google, Microsoft, and Amazon, who use our platform for innovation. Whether you're a startup or an enterprise, our solutions scale with your needs, offering robust features, security, access controls, and dedicated support.

### Explore Our Products
- **Models**: Browse, deploy, and collaborate on AI models.
- **Datasets**: Access a vast library of datasets for any ML task.
- **Spaces**: Share and demo your ML applications seamlessly.

For those looking for enterprise solutions, we offer:
- Single Sign-On (SSO) integration
- Advanced security features
- Granular access controls
- Priority support

---

## Careers at Hugging Face
Join us on our journey to shape the future of AI. We are constantly looking for talented individuals who share our passion for machine learning. Explore current job openings on our **[Careers Page](https://huggingface.co/jobs)** and see how you can make an impact.

### Why Work With Us?
- **Innovative Environment**: Work on cutting-edge technology in a collaborative space.
- **Growth Opportunities**: We support professional development through resources and mentorship.
- **Inclusive Culture**: We celebrate diversity and promote a positive workplace for all.

---

## Connect With Us
- **Website**: [huggingface.co](https://huggingface.co)  
- **Twitter**: [@huggingface](https://twitter.com/huggingface)  
- **LinkedIn**: [Hugging Face](https://www.linkedin.com/company/huggingface)

Join us in building a future where AI is accessible to everyone!



In [211]:
import pycountry

def is_valid_language(lang):
    return bool(
        pycountry.languages.get(name = lang.capitalize()) or 
        pycountry.languages.get(alpha_2 = lang.lower())or
        pycountry.languages.get(alpha_3 = lang.lower())
    )

In [219]:
target_language = input("Enter the language: ")
if is_valid_language(target_language):
    print("Valid Language")
else :
    print("Invalid")

Enter the language:  JPN


Valid Language


In [220]:
system_lang_prompt = """
You are a professional copywriter and translator.
Your task is to take the provided brochure content and produce a version in {target_language}.

Requirements:
1. Translate all text into natural, fluent {target_language}.
2. Infuse humor and wit throughout, while keeping the original meaning and tone intact.
   - Add light jokes, playful phrasing, and clever word choices that resonate with {target_language} humor.
   - Do not make it offensive or culturally inappropriate.
3. Respond in markdown. Also, preserve the structure of the brochure (headings, subheadings, bullet points, formatting).
4. Ensure the final text feels like a professionally written humorous {target_language} brochure, not a literal translation.
5. Keep the message persuasive, engaging, and audience-friendly.

Always output only the humorous {target_language} version of the brochure, nothing else.
"""

In [229]:
def get_brochure_in_target_language(myurl,company_name,target_language):
    user_prompt = f"Here is the brochure text for {company_name}, retrieved from {myurl}.\n\n"
    user_prompt += "Your task:\n"
    user_prompt += f"1. Take this brochure content and translate it into natural, fluent {target_language}.\n"
    user_prompt += f"2. Infuse humor and wit throughout, while keeping the original meaning and tone intact.\n"
    user_prompt += "3. Keep the formatting, structure, and section flow (headings, bullet points, etc.) the same.\n"
    user_prompt += f"4. Output only the final humorous {target_language} brochure text, nothing else.\n\n"
    user_prompt += "I want the response in form of markdown."
    brochure_content = get_brochure(company_name, myurl)
    if brochure_content is not None:
        user_prompt += brochure_content
    else:
        user_prompt += "\n[Error: Could not retrieve brochure content]"
    
    return user_prompt

In [230]:
messages_target_lang = [
    {'role':'system','content':system_lang_prompt},
    {'role':'user','content':get_brochure_in_target_language(myurl,company_name,target_language)}
]

In [231]:
def get_brochure_target_language(myurl,company_name,target_language):
    response = openai.chat.completions.create(
        model = 'gpt-4o-mini',
        messages = messages_target_lang
    )
    result = response.choices[0].message.content
    return result

In [232]:
display(Markdown(get_brochure_target_language(myurl,company_name,target_language)))

# ハギングフェイスのパンフレット

## 会社概要
ハギングフェイスは人工知能の分野における先駆的な企業で、頑丈で協力的な機械学習コミュニティの構築に献身しています。この会社の使命は、機械学習を民主化し、モデル、データセット、アプリケーションに関する協力を促進する強力なプラットフォームを提供することです。

## コミュニティの engagement
ハギングフェイスの努力の核心は、その活気にあふれるコミュニティです。このコミュニティは、AIの能力を向上させるために継続的に貢献しています。プラットフォームは **100万以上のモデル** と **25万のデータセット** をホストし、開発者、データサイエンティスト、研究者が洞察や革新を共有するためのシームレスな環境を提供しています。

## 私たちの製品
- **モデル**: テキスト、画像、音声、ビデオ処理を含む様々な機械学習タスクに対応する多様な事前訓練済みモデルにアクセスできます。あなたのインスピレーションが飛び出す瞬間がここにあります！
- **データセット**: 豊富なデータセットコレクションを探索し、実験とモデル訓練の幅広い可能性を実現します。データの海を泳ぎ回りましょう！
- **スペース**: 機械学習アプリケーションをユーザーフレンドリーな形で披露し、現実世界でのエンゲージメントを促進します。これはまるでAIのための遊び場です！

## 文化
ハギングフェイスはオープンさと革新の文化を推進しています。会社は協力に生き、個人や組織の貢献を大切にします。従業員やコミュニティメンバーには、議論に参加し、知識を共有し、AIエコシステムの皆に利益をもたらすツールを強化することが奨励されています。

**コアバリュー:**
- **コラボレーション**: アイデアが自由に共有・発展できる環境を育みます。
- **包括性**: AIコミュニティにおける多様な視点や背景を受け入れます。多様性こそが私たちの強みです！
- **透明性**: オープンソースの実践にコミットし、誰もが貢献し学べる環境を提供します。

## カスタマーベース
ハギングフェイスは、**50,000以上の組織**を誇りに支援しています。これには、**Google**、**Meta**、**Amazon**、**Microsoft** などの技術の巨人たちも含まれています。このプラットフォームは、高度なツールやサービスを求めるチームや企業向けにカスタマイズされています。AIアプリケーションの構築とスケールアップを効果的に行えることでしょう！

## キャリアと機会
ハギングフェイスは、自社のミッションに参加し、ダイナミックで包括的な環境で成長したい情熱的な個人を常に探しています。ハギングフェイスで働くことで、あなたも：
- 画期的なAIプロジェクトに貢献できます。
- AI分野の思想的リーダーや革新者とコラボレーションできます。
- ワークライフバランスを重視したフレキシブルでサポートの行き届いた職場を楽しめます。

### 現在の求人情報
ハギングフェイスでは、さまざまな部門で定期的に求人を募集中です。興味のある候補者は、エンジニアリング、リサーチ、パートナーシップ、サポート役割など、スキルに合わせた機会を見つけることができます。案外、あなたの夢の仕事がもうすぐそこにありますよ！

## 料金プラン
ハギングフェイスは、個人や組織のニーズに合わせた柔軟な料金体系を提供しています：
- **PRO アカウント**: **$9/月**からスタート。強化された機能を求める個人に最適です。そこにいるあなた、AIにしっかり向き合う準備はできていますか？
- **チームプラン**: **$20/ユーザー/月**からスタート。成長中のチームを支えるために設計された、高度なコラボレーションツールを提供します。
- **エンタープライズソリューション**: 大規模な組織の特定のニーズに合わせたカスタムプランを提供し、セキュリティ、スケーラビリティ、専任サポートを確保します。

今日、ハギングフェイスに参加してAI革命の一翼を担いましょう！

**詳細情報のお問い合わせは:**  
[ハギングフェイスのウェブサイト](https://huggingface.co)  
最新の更新や洞察については、[Twitter](https://twitter.com/huggingface)や[LinkedIn](https://www.linkedin.com/company/huggingface)でフォローしてください。