In [57]:
# Importing the libraries
import os
import requests
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import json
from IPython.display import display, Markdown, update_display

In [4]:
# load the API keys
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")

In [5]:
# Generic System Message
system_message = "You are a helpful assistant"

In [7]:
# initiate openai object
openai = OpenAI(api_key=openai_api_key)

In [8]:
# Let's wrap a call to GPT-4o-mini is a simple function
def message_gpt(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]
    completion = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages
    )

    return completion.choices[0].message.content

In [9]:
# Testing the function
# This can reveal the "training cut off", or the most recent date in the training data
message_gpt("What is today's Date?")

"Today's date is October 8, 2023."

### Gradio Interface

In [11]:
# Simple testing function
def shout(text):
    print(f"Shout has been called with input: {text}")
    return text.upper()

In [14]:
# Simple gradio interface
gr.Interface(fn=shout, inputs='textbox', outputs='textbox').launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Shout has been called with input: Hi
Shout has been called with input: The test message


In [15]:
# Adding share=True means that it can be accessed publically
# A more permanent hosting is available using a platform called Spaces from HuggingFace,
# NOTE: Some Anti-virus software and Corporate Firewalls might not like you using share=True. If you're at work on on a work network, I suggest skip this test.

gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never").launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://88179681be246173ac.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Shout has been called with input: Hi
Shout has been called with input: Test message


In [16]:
# Adding inbrowser=True opens up a new browser window automatically
gr.Interface(fn=shout, inputs='textbox', outputs='textbox', flagging_mode='never').launch(inbrowser=True)

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




Shout has been called with input: Hi
Shout has been called with input: Hi
Shout has been called with input: Test Message


## Forcing dark mode

Gradio appears in light mode or dark mode depending on the settings of the browser and computer. There is a way to force gradio to appear in dark mode, but Gradio recommends against this as it should be a user preference (particularly for accessibility reasons). But if you wish to force dark mode for your screens, below is how to do it.

In [17]:
# Define this variable and then pass js=force_dark_mode when creating the Interface

force_dark_mode = """
function refresh() {
    const url = new URL(window.location);
    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""
gr.Interface(fn=shout, inputs="textbox", outputs="textbox", flagging_mode="never", js=force_dark_mode).launch()

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.




In [20]:
# Input and Output
view = gr.Interface(
    fn=shout, 
    inputs=[gr.Textbox(label='Your Message:', lines=6)],
    outputs=[gr.Textbox(label='Response', lines=6)],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




Shout has been called with input: Hi There!
Shout has been called with input: Hi There!
This is test message



In [21]:
# Changing the function from 'shout' to 'message_gpt'
view = gr.Interface(
    fn=message_gpt,
    inputs=[gr.Textbox(label='Your Message:', lines=6)],
    outputs=[gr.Textbox(label='Response:', lines=6)],
    flagging_mode='never'
)

view.launch()

* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




In [24]:
# Let's use Markdown
# Are you wondering why it makes any difference to set system_message when it's not referred to in the code below it?
# I'm taking advantage of system_message being a global variable, used back in the message_gpt function (go take a look)
# Not a great software engineering practice, but quite common during Jupyter Lab R&D!

system_message = "You are a helpful assistant that respond in markdown"

view = gr.Interface(
    fn=message_gpt,
    inputs=[gr.Textbox(label='Your Message:')],
    outputs=[gr.Markdown(label='Response:')],
    flagging_mode='never'
)

view.launch()

* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.




In [28]:
# Let's create a call that streams back results
def stream_gpt(prompt):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]

    stream = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [29]:
view = gr.Interface(
    fn=stream_gpt,
    inputs=[gr.Textbox(label="Your Message:")],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7870
* To create a public link, set `share=True` in `launch()`.




In [30]:
# Google model with streaming in Markdown
gemini_via_openai_client = OpenAI(
    api_key=google_api_key,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

def stream_google(prompt):
    messages=[{"role": "system", "content": system_message},
              {"role": "user", "content": prompt}]
    
    stream_result = gemini_via_openai_client.chat.completions.create(
        model='gemini-2.0-flash',
        messages=messages,
        stream=True
    )

    result=""
    for chunk in stream_result:
        result+= chunk.choices[0].delta.content or ""
        yield result

In [31]:
view = gr.Interface(
    fn=stream_google,
    inputs=[gr.Textbox(label="Your Message:")],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7871
* To create a public link, set `share=True` in `launch()`.




In [35]:
# Option to select model for response
def stream_model(prompt, model):
    if model == 'GPT':
        result = stream_gpt(prompt)
    elif model == 'Google':
        result = stream_google(prompt)
    else:
        raise ValueError("Unknown Model")
    yield from result

In [36]:
view = gr.Interface(
    fn=stream_model,
    inputs=[gr.Textbox(label="Your Message:"),
            gr.Dropdown(['GPT', 'Google'], label="Select the model", value='GPT')],
    outputs=[gr.Markdown(label="Response:")],
    flagging_mode="never"
)

view.launch()

* Running on local URL:  http://127.0.0.1:7873
* To create a public link, set `share=True` in `launch()`.




## Building a company brochure generator

In [48]:
# A class to represent the webpage scraper

# Some websites need you to use proper headers when fetching their content:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scrapped with hyperlinks
    """

    def __init__(self, url: str):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No Title Found"
        if soup.body:
            for irrelevant in soup.body(['script', 'style', 'img', 'input']):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ""
        links = [link.get("href") for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self) -> str:
        return f"Webpage title: {self.title}\nWebpage Content:\n{self.text}\n\n"

In [39]:
# Single shot prompting for better results
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brouchure about the company,\
such as links to an About page, or Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should responde is JSON as in this example:"
link_system_prompt += """
{
    links=[
        {"type": "about_page", "url": "https://full.url/goes/here/about"},
        {"type": "careers_page", "url": "https://another.full.url/careers"}
    ]
}
"""

# printing the prompt
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brouchure about the company,such as links to an About page, or Company page, or Careers/Jobs pages.
You should responde is JSON as in this example:
{
    links=[
        {"type": "about_page", "url": "https://full.url/goes/here/about"},
        {"type": "careers_page", "url": "https://another.full.url/careers"}
    ]
}



In [40]:
# Function to embed the website to the prompt
def get_user_link_prompt(website: Website) -> str:
    user_prompt = f"Here is a list of list of links on the website of {website.url}\n"
    user_prompt += "Please decide which of these are relevant web links for a broucher about the company, respond with the full http URL in JSON format. \
Do not include Terms and Services or Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [42]:
# Function to fetch the hyperlinks from a website with llm
def get_links(url: str):
    website = Website(url)
    response = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_user_link_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )

    result = response.choices[0].message.content
    return json.loads(result)

In [44]:
# Function to assemble all the details into a single prompt before sending this to LLM Model
def get_all_details(url: str):
    result = "Landing Page:\n"

    links = get_links(url)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += Website(url).get_contents()
    
    return result

In [45]:
# Prompt to create the company brochure
system_message = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown."

def get_brochure_user_prompt(company_name: str, url: str) -> str: 
    user_prompt = f"You are looking at a company called: {company_name}.\n"
    user_prompt += "Here are the contents of its landing page and other relevant pages; "
    user_prompt += "Use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    return user_prompt

In [52]:
# Function to generate company brochure with LLM model
def create_brochure(company_name: str, url: str):
    response = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages = [
            {"role": "system", "content": system_message},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ]
    )
    result = response.choices[0].message.content
    display(Markdown(str(result)))

In [53]:
create_brochure('HuggingFace', 'https://huggingface.co')

# Hugging Face Brochure

---

## **About Us**
Hugging Face is the vibrant AI community dedicated to building the future of machine learning. Our platform fosters collaboration among AI specialists by providing tools for sharing models, datasets, and applications.

---

## **Our Offerings**
- **Models**: Access a vast library of over **1 million models** to accelerate your projects.
- **Datasets**: With **250k+ datasets**, we simplify the process of finding and utilizing quality data.
- **Spaces**: Create and share applications in various modalities including text, image, and audio.
- **Services**: Explore our **Compute solutions** starting at $0.60/hour for GPU and **Enterprise solutions** from $20/user/month.

---

## **Trending Models This Week**
1. **nvidia/parakeet-tdt-0.6b-v2** - 42.1k Views
2. **nari-labs/Dia-1.6B** - 144k Views
3. **ACE-Step/ACE-Step-v1-3.5B** - 262 Views

Explore more on our platform!

---

## **Collaborate and Innovate**
Our open-source tools encourage community contributions, enabling the development of:
- **Transformers**: 144,075 implementations across multiple frameworks.
- **Diffusers**: 28,903 cutting-edge models for generating various media.
- **Safetensors**: A secure method for handling neural network weights.

---

## **Join Our Community**
With more than **50,000 organizations** including names like Google, Microsoft, and Amazon leveraging our platform, Hugging Face is at the forefront of AI innovation.

### Prospective Clients
Empower your teams with advanced tools, enterprise-grade security, and dedicated support.

### Investors
Become part of a rapidly growing community reshaping AI with the latest tools and technologies.

### Recruits
Join us and collaborate with top minds in the industry to push the boundaries of what AI can achieve.

---

## **Get Involved**
- **Sign Up**: Create your account today and start collaborating.
- **Explore**: Dive deep into **1 million models** and **400k applications**.
- **Connect**: Follow us on [GitHub](https://github.com), [Twitter](https://twitter.com), [LinkedIn](https://www.linkedin.com), and [Discord](https://discord.com) for updates.

---

### **Visit Us Today**
For more information, explore our full suite of services at [Hugging Face](https://huggingface.co/).

---

Building The Future of AI, Together.

In [58]:
# Create brochure with different LLM models with streaming
def brochure_stream_model(company_name: str, url: str, model: str) -> str:
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": get_brochure_user_prompt(company_name, url)}]

    if model == "GPT":
        stream_response = openai.chat.completions.create(
            model='gpt-4o-mini',
            messages=messages,
            stream=True)

    elif model == "Google":
        stream_response = gemini_via_openai_client.chat.completions.create(
            model='gemini-2.0-flash',
            messages=messages,
            stream=True
        )
    else:
        raise ValueError("Unknown Model")

    result = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream_response:
        result += chunk.choices[0].delta.content or ""
        
        # Update the displayed Markdown content with the latest response
        update_display(Markdown(result), display_id=display_handle.display_id)

In [59]:
brochure_stream_model('HuggingFace', 'https://huggingface.co', 'GPT')

# Hugging Face Brochure

Welcome to **Hugging Face** – the AI community building the future!

## Who We Are
Hugging Face is a collaborative platform designed for the machine learning community to create, discover, and share models, datasets, and applications. With us, you're not just a user; you're part of a thriving community of innovators and creators.

## Our Offerings

### **Models**
Explore over **1 million+ models** across various domains including:
- **NLP**
- **Computer Vision**
- **Audio Processing**

### **Datasets**
Access an extensive library of **250,000+ datasets** tailored for a myriad of machine learning tasks, enhancing your research and development processes.

### **Spaces**
Join **400,000+ applications** hosted on our platform, featuring tools like:
- **DeepSite** - Generate applications effortlessly.
- **ICEdit** - Universal image editing tool.

### **Compute Solutions**
We provide paid **Compute Solutions** starting at **$0.60/hour** for GPU deployment, designed to empower enterprises and users to move faster and make an impact.

### **Enterprise Solutions**
For organizations, we offer enterprise-grade tools with:
- Security features
- Dedicated support
- Custom access controls
Starting at **$20/user/month**

## Why Choose Us?
- **Open Source Community:** We are committed to building the foundation of machine learning tools together with our users – including popular tools such as **Transformers**, **Diffusers**, and **Tokenizers**.
- **Robust Support:** Join over **50,000 organizations** including tech giants like Google, Microsoft, and Amazon in leveraging our powerful tools.

## Join Us!
Explore the possibilities of AI with Hugging Face. 
- **Sign Up Today** and start contributing to revolutionary projects.
- Discover our offerings through our [Website](https://huggingface.co).

### Connect With Us
- **GitHub:** [Hugging Face GitHub](https://github.com/HuggingFace)
- **Twitter:** [@huggingface](https://twitter.com/huggingface)
- **LinkedIn:** [Hugging Face on LinkedIn](https://www.linkedin.com/company/huggingface)
- **Join our Discord community** to collaborate and learn from peers.

---

Together, let’s build the AI culture of the future!

In [62]:
def brochure_stream_model(company_name: str, url: str, model: str) -> str:
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": get_brochure_user_prompt(company_name, url)}]

    if model == "GPT":
        stream_response = openai.chat.completions.create(
            model='gpt-4o-mini',
            messages=messages,
            stream=True)

    elif model == "Google":
        stream_response = gemini_via_openai_client.chat.completions.create(
            model='gemini-2.0-flash',
            messages=messages,
            stream=True
        )
    else:
        raise ValueError("Unknown Model")

    result = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream_response:
        result += chunk.choices[0].delta.content or ""
        
        # Update the displayed Markdown content with the latest response
        # update_display(Markdown(result), display_id=display_handle.display_id) # uncomment this to run on local notebook
        yield result  # This will stream the result to Gradio

In [67]:
# Gradio Interface to generate company brochure with different LLM models
view = gr.Interface(
    fn=brochure_stream_model,
    inputs=[
        gr.Textbox(label='Company Name:'),
        gr.Textbox(label='URL'),
        gr.Dropdown(["GPT", "Google"], label="Select the model", value="GPT")
    ],
    outputs=gr.Markdown(label="Response:"),
    flagging_mode="never",
)

view.launch()

* Running on local URL:  http://127.0.0.1:7879
* To create a public link, set `share=True` in `launch()`.








