In [34]:
import os
from groq import Groq
from bs4 import BeautifulSoup
import requests
import json
# from IPython.display import display, Markdown

In [35]:
import gradio as gr

In [36]:
def getWebLinks(url):  
    """Fetches and extracts links from a given website."""
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser' )
    links = []
    anchorTags = soup.find_all('a')
    for link in anchorTags:
        links.append(link.get('href'))
    return [link for link in links if link]

In [37]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

def fetch_website_contents(url):
    """
    Return the title and contents of the website at the given url;
    truncate to 2,000 characters as a sensible limit
    """
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]

In [38]:
def getLinkSystemPrompt():
    return """
            You are provided with list of links
            You are to do decide on filtering the list based on the relevant links to use in a brochure 
            for a company. Provide the response strictly in json format as in this example:
            {
                "links": [
                    {"type": "about page", "url": "http:full_url/goes/here/about" }, 
                    {"type": "careers pape", "url": "http:full_url/goes/here/careers" } 
                ]
            }


"""
def getLinkUserPrompt(url): 
    userPrompt = f"""
            Here is the list of links on the website  - {url}
            Decide which of these are relevent web links required for a brochure of the company,
            respond with full http/https URL in json format. Do not include terms and conditions , privacy or email and social media links.

            Links (some might be relevant links):

            """
    links = getWebLinks(url)
    userPrompt += "\n".join(links)
    return userPrompt

def getMessage(sytemPrompt, userPrompt):
    return [
    {"role": "system", "content" : sytemPrompt},
     {"role": "user", "content" : userPrompt}
]



In [39]:

def getReleventLinks(client, model, messages):
    response = client.chat.completions.create(
        model=model,
        messages = messages,
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content

def generateBrochure(client, model, messages):
    response = client.chat.completions.create(
        model=model,
        messages = messages
    )
    return response.choices[0].message.content

In [40]:
# def displayBrochure(result):
#     display(Markdown(result))

In [41]:
def brochureSystemPrompt():
    return """ You are a senior brand strategist and marketing copywriter.
    Your task is to create clear, persuasive, and well-structured brochure content in Markdown format for companies using raw scraped website data.
    You must:
    Synthesize and clean messy, repetitive, or unstructured scraped content
    Preserve factual accuracy and avoid hallucinations
    Write in polished, professional marketing language
    Adapt tone to the companyâ€™s industry and positioning
    Organize content using proper Markdown headings, bullet lists, and spacing
    Do not invent services, claims, certifications, clients, metrics, or awards not explicitly supported by the provided data.
    If information is missing, omit it gracefully rather than guessing.
    All output must be valid Markdown. """

def brochureUserPrompt(bodyText):
    return f""" 
    Using the scraped website data provided below, generate brochure-ready content in Markdown format for the company.
    Company Website Data

    {bodyText}

    Brochure Requirements
    Target audience: {{e.g., enterprise clients / SMBs / consumers / investors}}
    Tone: {{e.g., professional, innovative, trustworthy, friendly}}
    Brochure format: {{e.g., tri-fold, one-page overview, multi-page PDF}}
    Required Sections
    Structure the brochure using Markdown headings (##, ###) in the following order:
    Company Overview
    Value Proposition / What We Do
    Key Products or Services
    Unique Differentiators
    Industries or Use Cases (if available)
    Brand Promise or Mission (if available)
    Call to Action
    Writing Guidelines
    Rewrite content; do not copy verbatim from the website
    Keep language concise and brochure-friendly
    Use Markdown bullet points where appropriate
    Assume the content will be used directly by a designer
    Do not include placeholders or filler text
    Output only the brochure content in valid Markdown. Do not include explanations or commentary.

    """

In [42]:
def runBrochureGeneration(url):
    api_key = os.getenv("GROQ_API_KEY")
    client = Groq(api_key=api_key)
    model = "llama-3.3-70b-versatile"
    messages = getMessage(getLinkSystemPrompt(), getLinkUserPrompt(url))
    result = getReleventLinks(client, model, messages)
    links = json.loads(result) 
    bodyText = ""
    for link in links['links']:
        bodyText += fetch_website_contents(link['url'])
    brochureMessages = getMessage(brochureSystemPrompt() , brochureUserPrompt(bodyText[:5000]) )
    return generateBrochure(client, model, brochureMessages)



In [43]:
message_input = gr.Textbox(label="Your Website: ", info="Enter your website url", placeholder="https://example.com")
message_output = gr.Markdown(label="Generated Brochure: ",)

demo = gr.Interface(fn=runBrochureGeneration ,inputs=message_input, outputs=message_output, title="Website Brochure Generator", show_progress="full")
demo.launch()

* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.


