In [1]:
import re
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from google import genai
import gradio as gr

In [2]:
load_dotenv()
api_key = os.getenv('GEMINI_API_KEY')
if api_key:
    print("API Key looks good.")
else:
    print("There might be some problem with your API Key. Please check.")

# import google.generativeai as genai
# genai.configure(api_key=api_key)
# MODEL = "gemini-2.0-flash-lite"
# Content = "Explain how AI works in a few words"
# Gemini = genai.GenerativeModel(MODEL)

# response = Gemini.generate_content(
#     contents=Content
# )

Gemini = genai.Client(api_key=api_key)
MODEL = "gemini-2.0-flash-lite"
Content = "Explain how AI works in a few words"
response = Gemini.models.generate_content(
    model=MODEL, contents=Content
)
print(response.text)

API Key looks good.
AI learns from data, making predictions or actions.



In [3]:
def Chrome_Version():
    ChromeVer = !reg query "HKEY_CURRENT_USER\Software\Google\Chrome\BLBeacon" /v version
    text = '\n'.join(ChromeVer)

    # Extract the version number
    version = re.search(r'\d+\.\d+\.\d+\.\d+', text)
    ChromeVer = version.group() if version else "Not found"
    #print("Chrome version:", version.group() if version else "Not found")
    return ChromeVer
latestversion = Chrome_Version()

In [4]:
browser = "Chrome/"+latestversion
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"# + browser
}

class Website:
    '''Scaping website'''

    def __init__(self, url):
        self.url = url
        response = requests.get(url=url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found."

        if soup.body:
            for irrelavant in soup.body(["script", "style", "img", "input"]):
                irrelavant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        #print(self.text)

        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
        #print(self.links)
    
    def get_contents(self):
        return f"Webpage Title: \n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [5]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/',
 'https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/',
 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/',
 'https://edwarddonner.com/2024/12/21/llm-

In [6]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [8]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [9]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/
https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/
https://edwarddonner.com/2025/01/23/ll

In [10]:
def get_links(url):
    website = Website(url)
    messages = []
    # if link_system_prompt:
    #     messages.append({"role": "system", "content": link_system_prompt})
    # messages.append({"role": "user", "content": get_links_user_prompt(website)})
    # system_instruction = {"role": "system", "content": link_system_prompt}
    # prompt = {"role": "user", "content": get_links_user_prompt(website)}
   
    #generation_config = {"response_mime_type": "application/json"}
    # message = [
    #         {"role": "system", "content": link_system_prompt},
    #         {"role": "user", "content": get_links_user_prompt(website)}
    #   ]

    full_prompt = f"""
    System: {link_system_prompt}

    User: {get_links_user_prompt(website)}

    Please respond in valid JSON format.
    """
    try:
        response = Gemini.models.generate_content(
            model=MODEL,
            contents=full_prompt
            # model=MODEL,
            # messages,
            # generation_config = {"response_mime_type": "application/json"}
            # response_format={"type": "json_object"}
        )
        raw_text = response.text.strip()

        # Extract JSON using regex
        json_match = re.search(r'\{.*\}', raw_text, re.DOTALL)
        if json_match:
            json_data = json.loads(json_match.group())
            return json_data
        else:
            print("No valid JSON found in the response.")
            print("Raw response:", raw_text)
            return None

    except json.JSONDecodeError:
        print("Error decoding JSON response")
        print("Raw response:", response.text)
        return None

    # result = response.text
    # return json.loads(result)

In [11]:
huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/posts',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/nari-labs/Dia-1.6B',
 '/Qwen/Qwen3-235B-A22B',
 '/deepseek-ai/DeepSeek-Prover-V2-671B',
 '/Qwen/Qwen3-30B-A3B',
 '/moonshotai/Kimi-Audio-7B-Instruct',
 '/models',
 '/spaces/nari-labs/Dia-1.6B',
 '/spaces/enzostvs/deepsite',
 '/spaces/Qwen/Qwen3-Demo',
 '/spaces/stepfun-ai/Step1X-Edit',
 '/spaces/nvidia/describe-anything-model-demo',
 '/spaces',
 '/datasets/nvidia/OpenMathReasoning',
 '/datasets/OpenGVLab/InternVL-Data',
 '/datasets/Eureka-Lab/PHYBench',
 '/datasets/nvidia/OpenCodeReasoning',
 '/datasets/Anthropic/values-in-the-wild',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/transformers',

In [12]:
get_links("https://huggingface.co")

{'links': [{'type': 'models page', 'url': 'https://huggingface.co/models'},
  {'type': 'datasets page', 'url': 'https://huggingface.co/datasets'},
  {'type': 'spaces page', 'url': 'https://huggingface.co/spaces'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'docs page', 'url': 'https://huggingface.co/docs'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'}]}

In [13]:
# Make the Brochure

def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    try:
        for link in links["links"]:
            result += f"\n\n{link['type']}\n"
            result += Website(link["url"]).get_contents()
    except:
        print("None type object")
    return result

In [14]:
print(get_all_details("https://huggingface.co"))

Found links: {'links': [{'type': 'models page', 'url': 'https://huggingface.co/models'}, {'type': 'datasets page', 'url': 'https://huggingface.co/datasets'}, {'type': 'spaces page', 'url': 'https://huggingface.co/spaces'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'docs page', 'url': 'https://huggingface.co/docs'}, {'type': 'learn page', 'url': 'https://huggingface.co/learn'}, {'type': 'brand page', 'url': 'https://huggingface.co/brand'}, {'type': 'github', 'url': 'https://github.com/huggingface'}, {'type': 'twitter', 'url': 'https://twitter.com/huggingface'}, {'type': 'linkedin', 'url': 'https://www.linkedin.com/company/huggingface/'}]}
Landing page:
Webpage Title: 
Hugging Face – T

In [15]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [16]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages \
    use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5000]
    return user_prompt

In [17]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'models page', 'url': 'https://huggingface.co/models'}, {'type': 'datasets page', 'url': 'https://huggingface.co/datasets'}, {'type': 'spaces page', 'url': 'https://huggingface.co/spaces'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'docs page', 'url': 'https://huggingface.co/docs'}, {'type': 'learn page', 'url': 'https://huggingface.co/learn'}]}


'You are looking at a company: HuggingFace\nHere are the contents of its landing page and other relevant pages     use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title: \nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nPosts\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nnari-labs/Dia-1.6B\nUpdated\n4 days ago\n•\n97.3k\n•\n1.62k\nQwen/Qwen3-235B-A22B\nUpdated\nabout 9 hours ago\n•\n15.2k\n•\n562\ndeepseek-ai/DeepSeek-Prover-V2-671B\nUpdated\nabout 17 hours ago\n•\n545\n•\n530\nQwen/Qwen3-30B-A3B\nUpdated\n1 day ago\n•\n22.4k\n•\n363\nmoonshotai/Kimi-Audio-7B-Instruct\nUpdated\n3 days ago\n•\n2.89k\n•\n249\nBrowse 1M+ models\nSpaces\nRunning\non\nZero\n1k\n1k\nDi

In [18]:
def create_brochure(company_name, url):
    full_prompt = f"""
    System: {system_prompt}

    User: {get_brochure_user_prompt(company_name,url)}

    """
    try:
        response = Gemini.models.generate_content(
            model=MODEL,
            contents=full_prompt
            # model=MODEL,
            # messages,
            # generation_config = {"response_mime_type": "application/json"}
            # response_format={"type": "json_object"}
        )
    except:
        print("Error decoding response")
        return None

    result = response.text
    display(Markdown(result))

In [19]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'models page', 'url': 'https://huggingface.co/models'}, {'type': 'datasets page', 'url': 'https://huggingface.co/datasets'}, {'type': 'spaces page', 'url': 'https://huggingface.co/spaces'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'docs page', 'url': 'https://huggingface.co/docs'}, {'type': 'learn page', 'url': 'https://huggingface.co/learn'}]}


```markdown
# Hugging Face: Build the Future of AI Together

**Welcome to Hugging Face, the AI community building the future!** We are the leading platform for collaboration in machine learning, offering a comprehensive ecosystem for building, sharing, and deploying AI models, datasets, and applications.

## What We Offer

*   **Models:** Explore over 1 million pre-trained models for a wide range of tasks, from text generation to image classification, and everything in between.
*   **Datasets:** Access and share 250k+ datasets to fuel your AI projects.
*   **Spaces:** Run and showcase your AI applications with our easy-to-use platform.
*   **Open Source Tools:** Leverage our open-source stack, including Transformers, Diffusers, and more, to accelerate your ML journey.

## Why Choose Hugging Face?

*   **Collaboration:** Connect with a vibrant community of AI researchers, developers, and enthusiasts. Share your work, learn from others, and build together.
*   **Speed and Efficiency:** Utilize our pre-trained models, datasets, and open-source tools to move faster and focus on innovation.
*   **Comprehensive Solutions:** Access a full suite of tools and services, from model hosting to compute resources, to support every stage of your AI project.

## For Prospective Customers & Investors

*   **Enterprise Solutions:** Give your team the most advanced platform to build AI with enterprise-grade security, access controls and dedicated support.
    *   Single Sign-On
    *   Regions
    *   Priority Support
    *   Audit Logs
    *   Resource Groups
    *   Private Datasets Viewer
*   **Compute:** Deploy on optimized Inference Endpoints or update your Spaces applications to a GPU in a few clicks. Starting at $0.60/hour for GPU.
*   **Trusted by Industry Leaders:** Join the ranks of 50,000+ organizations that are using Hugging Face including: AI2, Meta, Amazon, Google, Intel, Microsoft, Grammarly, Writer.

## Join Our Community

*   **Open Source:** Our open-source initiatives are at the heart of our mission. Contribute to projects like Transformers, Diffusers, and more to shape the future of AI.
*   **Connect with Us:** Stay up-to-date on the latest developments and join the conversation on GitHub, Twitter, LinkedIn, and Discord.

## Careers at Hugging Face

We are always looking for talented individuals to join our team! Visit our [Jobs](https://huggingface.co/jobs) page to explore opportunities.
```


In [33]:
def system_prompt(tone):
    emotion_prompt = ""
    if tone == "Normal":
        emotion_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."
    elif tone == "Funny":
        emotion_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."
    elif tone == "Angry":
        emotion_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short angered, mad, furious brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."
    elif tone == "Surprise":
        emotion_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short amazement, astonishment, curiosity brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."
    else:
        emotion_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short politeness, respect, professionalism brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

    return emotion_prompt


In [34]:
def stream_brochure(company_name, url, tone):
    full_prompt = f"""
    System: {system_prompt(tone)}

    User: {get_brochure_user_prompt(company_name,url)}

    Respond in brochure format.
    Colors: Use a palette that evokes warmth and comfort, such as earthy tones, pastels, or a mix of bright and soft colors.
    Font:Choose a font that is legible and complements the overall theme (e.g., a modern sans-serif for the headline, a more traditional serif for the body text).
    """
    
    try:
        stream = Gemini.models.generate_content_stream(
            model=MODEL,
            contents=full_prompt
        )
    except:
        print("Error decoding response")
        return None

    response = ""
    #display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.text or ''
        yield response
        # response = response.replace("```","").replace("markdown", "")
        # update_display(Markdown(response), display_id=display_handle.display_id)

In [35]:
stream_brochure("HuggingFace", "https://huggingface.co","Funny")

<generator object stream_brochure at 0x0000018B0F51D8B0>

In [None]:
view = gr.Interface(
    fn=stream_brochure,
    inputs=[
        gr.Textbox(label="Company name:"),
        gr.Textbox(label="Landing page URL including http:// or https://"),
        gr.Dropdown(["Normal", "Funny", "Angry","Surprise", "Formal"], label="Select Emotion")
        ],
    outputs=[gr.Markdown(label="Brochure:")],
    flagging_mode="never"
)
view.launch()

* Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




Found links: {'links': [{'type': 'about page', 'url': 'https://www.geeksforgeeks.org/about/'}, {'type': 'careers page', 'url': 'https://geeksforgeeks.zohorecruit.in/careers'}, {'type': 'Advertise with Us', 'url': 'https://www.geeksforgeeks.org/advertise-with-us/'}, {'type': 'Campus Training Program', 'url': 'https://www.geeksforgeeks.org/campus-training-program/'}, {'type': 'Gfg Corporate Solution', 'url': 'https://www.geeksforgeeks.org/gfg-corporate-solution/'}, {'type': 'Courses', 'url': 'https://www.geeksforgeeks.org/courses'}, {'type': 'Jobs', 'url': 'https://www.geeksforgeeks.org/jobs'}, {'type': 'Press Release', 'url': 'https://www.geeksforgeeks.org/press-release/'}]}
Found links: {'links': [{'type': 'about page', 'url': 'https://www.geeksforgeeks.org/about/'}, {'type': 'careers page', 'url': 'https://geeksforgeeks.zohorecruit.in/careers'}, {'type': 'courses', 'url': 'https://www.geeksforgeeks.org/courses'}, {'type': 'advertise with us', 'url': 'https://www.geeksforgeeks.org/adve