In [38]:
# imports


import os
import json
import gradio as gr
from fpdf import FPDF
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from utilities import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [21]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [22]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [23]:
def get_links_user_prompt(url):
    user_prompt = f"""
        Here is the list of links on the website {url} -
        Please decide which of these are relevant web links for a brochure about the company, 
        respond with the full https URL in JSON format.
        Do not include Terms of Service, Privacy, email links.

        Links (some might be relative links):

    """
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [24]:
def select_relevant_links(url):
    """
    Select the most relevant links from the list of links on the website
    """
    user_prompt = get_links_user_prompt(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [25]:
def fetch_page_and_all_relevant_links(url):
    """
    Fetch the contents of a page, and all relevant links on the page
    """
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result
    

In [26]:
brochure_system_prompt = """
    You are an assistant that analyzes the contents of several relevant pages from a company website
    and creates a short brochure about the company for prospective customers, investors and recruits.
    Respond in markdown without code blocks.
    Include details of company culture, customers and careers/jobs if you have the information.
"""

In [27]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
        You are looking at a company called: {company_name}
        Here are the contents of its landing page and other relevant pages;
        use this information to build a short brochure of the company in markdown without code blocks.\n\n
    """
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [28]:
def generate_brochure(company_name, url):
   response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ]
   )
   result = response.choices[0].message.content
   display(Markdown(result))


In [29]:
generate_brochure("Hugging Face", "https://huggingface.co/")


# Hugging Face Brochure

## About Hugging Face  
Hugging Face is the premier collaboration platform for the global machine learning community, dedicated to building the future of AI. It serves as a central hub where machine learning engineers, scientists, and AI enthusiasts can share, explore, discover, and experiment with open-source models, datasets, and applications. Hugging Face empowers the next generation of AI developers and end users to learn, collaborate, and contribute to building an open, ethical AI future.

## What We Offer  
- **Models:** Access a vast library of over 2 million machine learning models across modalities including text, image, video, audio, and 3D.  
- **Datasets:** Discover and contribute to a repository with more than 500,000 datasets for training and evaluation.  
- **Spaces:** Build and showcase AI-powered applications and demos within the community.  
- **Community:** Collaborate with a fast-growing, global community of creators and experts driven by open-source innovation.  
- **Enterprise Solutions:** Scalable and secure AI deployments tailored for businesses, supported by paid compute options.  
- **Open Source Stack:** Leverage Hugging Face’s powerful open-source tools to accelerate AI development and deployment.

## Key Highlights  
- Host and collaborate on unlimited public machine learning models, datasets, and applications.  
- Explore trending cutting-edge models such as photorealistic image generators, text-to-image models, speech streaming models, and more.  
- Share your projects and build your professional profile inside the Hugging Face ecosystem.  
- Support for multiple data types and modalities, enabling experimentation across a wide variety of AI applications.

## Company Culture  
Hugging Face fosters an inclusive, collaborative, and community-driven culture. Rooted in transparency and openness, the company supports ethical AI development and encourages contributions from diverse backgrounds. The platform serves not just as a tool but as a vibrant community where mutual learning and shared progress are priorities.

## For Customers and Partners  
Hugging Face offers robust enterprise-grade solutions and paid compute resources designed to accelerate AI initiatives. Whether you are a startup, research institution, or an established enterprise, Hugging Face provides the infrastructure and expertise to bring your AI projects to life efficiently and ethically.

## Careers & Opportunities  
Join a fast-growing company at the forefront of AI innovation. Hugging Face seeks talented machine learning engineers, researchers, developers, and community builders who are passionate about open source and collaborative AI development. Being part of Hugging Face means contributing to transformative technologies while engaging with a vibrant global community that values diversity, creativity, and impact.

---

**Discover more:**  
Visit [huggingface.co](https://huggingface.co) to explore models, datasets, applications, and join the AI community shaping the future.  
Sign up to create your portfolio, share your work, and accelerate your machine learning projects with Hugging Face’s open platform and paid compute options.  

Join Hugging Face - Building the future of AI, together.

In [52]:
def generate_brochure_pdf(title, content):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_auto_page_break(auto=True, margin=15)

    # Add Unicode font
    pdf.add_font("DejaVu", "", "fonts/DejaVuSans.ttf", uni=True)
    pdf.set_font("DejaVu", "", 16)

    pdf.multi_cell(0, 10, title)

    pdf.ln(5)
    pdf.set_font("DejaVu", "", 12)
    pdf.multi_cell(0, 8, content)

    file_path = "brochurefile/"+title.replace(" ", "_")+"_brochure.pdf"
    pdf.output(file_path)

    return file_path


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
  pdf.add_font("DejaVu", "", "fonts/DejaVuSans.ttf", uni=True)
  pdf.add_font("DejaVu", "", "fonts/DejaVuSans.ttf", uni=True)


In [46]:
def stream_brochure(company_name, url):
   stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ]
   )
   result = stream.choices[0].message.content
   brochure_pdf_path = generate_brochure_pdf(company_name, result)
   return brochure_pdf_path


In [None]:
name_input = gr.Textbox(label="Company name:")
url_input = gr.Textbox(label="Landing page URL including http:// or https://")
message_output = gr.File(label="Download Brochure PDF")

view = gr.Interface(
    fn=stream_brochure,
    title="Brochure Generator", 
    inputs=[name_input, url_input], 
    outputs=message_output, 
    examples=[
            ["Hugging Face", "https://huggingface.co"],
            ["Gradio UI", "https://www.gradio.app/"]
        ], 
    flagging_mode="never"
    )
view.launch()

* Running on local URL:  http://127.0.0.1:7880
* To create a public link, set `share=True` in `launch()`.




  pdf.add_font("DejaVu", "", "fonts/DejaVuSans.ttf", uni=True)
  pdf.add_font("DejaVu", "", "fonts/DejaVuSans.ttf", uni=True)
