### Making a brochure for the website using  **openai's chat completions api, groq(for low latency)**, and the **Gradio Interface** for UI

In [56]:
import os
from openai import OpenAI
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import requests
import json
import gradio as gr
from IPython.display import Markdown, display, update_display

In [57]:
load_dotenv(override=True)
openai = OpenAI()
groq_api_key = os.getenv('GROQ_API_KEY')
groq_url = "https://api.groq.com/openai/v1"
groq = OpenAI(api_key=groq_api_key, base_url=groq_url)

In [58]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

#functions to fetch website contents and links
def fetch_website_contents(url):  #returns the title and contents of the website at the given url
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]


def fetch_website_links(url):  #returns the links on the webiste at the given url
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    links = [link.get("href") for link in soup.find_all("a")]
    return [link for link in links if link]

In [59]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [60]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [61]:
def select_relevant_links(url):
    response = groq.chat.completions.create(
        model="openai/gpt-oss-20b",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links




In [62]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

print(fetch_page_and_all_relevant_links("https://huggingface.co"))

## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
NEW
GGML and llama.cpp join Hugging Face üî•
Try HuggingChat Omni ‚Äì Chat with AI üí¨
Get started with Inference in seconds üöÄ
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
Qwen/Qwen3.5-397B-A17B
Updated
1 day ago
‚Ä¢
390k
‚Ä¢
986
zai-org/GLM-5
Updated
11 days ago
‚Ä¢
180k
‚Ä¢
1.5k
Nanbeige/Nanbeige4.1-3B
Updated
3 days ago
‚Ä¢
202k
‚Ä¢
773
nvidia/personaplex-7b-v1
Updated
9 days ago
‚Ä¢
539k
‚Ä¢
2.18k
MiniMaxAI/MiniMax-M2.5
Updated
8 days ago
‚Ä¢
224k
‚Ä¢
901
Browse 2M+ models
Spaces
Running
on
Zero
Featured
1.6k
Qwen Image Multiple Angles 3D Camera
üé•
1.6k
Change the camera angle of a photo with AI
Running
on
Zero
MCP
912
Wan2.2 14B Preview
üêå
912
generate a vid

In [63]:
system_prompt = """You are a helpful assistant that creates brochures for websites. You will be given the contents of the website and a list of links on the website. 
You will create a brochure that summarizes the contents of the website but the content will be in a quite funny yet engaging manner.
 The brochure should be concise and easy to read. It should include a title and a summary of the contents. The brochure should be no more than 500 words. The brochure should be written in a friendly and engaging tone. 
 The brochure should be formatted in a way that is easy to read and visually appealing. 
 The brochure should be designed to capture the attention of the reader and encourage them to visit the website.
 The brochure should be unique and not copied from any other source. The brochure should be written in English.
 Respond in markdown without the code block
"""

In [64]:
def get_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [65]:
get_user_prompt("HuggingFace", "https://huggingface.co")

'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nNEW\nGGML and llama.cpp join Hugging Face üî•\nTry HuggingChat Omni ‚Äì Chat with AI üí¨\nGet started with Inference in seconds üöÄ\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nQwen/Qwen3.5-397B-A17B\nUpdated\n1 day ago\n‚Ä¢\n390k\n‚Ä¢\n986\nzai-org/GLM-5\nUpdated\n11 days ago\n‚Ä¢\n180k\n‚Ä¢\n1.5k\nNanbeige/Nanbeige4.1-3B\nUpdated\n3 days ago\n‚Ä¢\n202k\n‚Ä¢\n773\nnvidia/personaplex-7b-v1\nUpdated\n9 days ago\n‚Ä¢\n539k\n‚Ä¢\n2

In [66]:
def stream_brochure(company_name, url):
    stream = groq.chat.completions.create(
        model="openai/gpt-oss-20b",
        messages=[
            {"role": "system", "content":system_prompt},
            {"role": "user", "content": get_user_prompt(company_name, url)}
          ],
        stream=True
    )    
    response = ""
    # display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        # update_display(Markdown(response), display_id=display_handle.display_id)
        yield response

In [67]:
# stream_brochure("HuggingFace", "https://huggingface.co")

In [68]:
name_input = gr.Textbox(label="Company name:")
url_input = gr.Textbox(label="Landing page URL including http:// or https://")
message_output = gr.Markdown(label="Generated Brochure")
message_examples = [["Hugging Face","https://huggingface.co"],["Google news","https://news.google.com/home?hl=en-US&gl=US&ceid=US:en"]]

In [69]:
view = gr.Interface(
    fn = stream_brochure,
    inputs = [name_input, url_input],
    outputs= [message_output],
    examples = message_examples,
    flagging_mode = "never"
)
view.launch(share=True)

* Running on local URL:  http://127.0.0.1:7872
* Running on public URL: https://87f19f3b851a5fcf52.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
