In [1]:
import os
import gradio as gr
import requests
from openai import OpenAI
from bs4 import BeautifulSoup
from dotenv import load_dotenv

In [2]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')

if openai_api_key:
    print("API Key looks good so far")
else:
    print("No API Key is found!")

API Key looks good so far


In [3]:
openai = OpenAI(api_key=openai_api_key)

system_prompt = (
    "You are a smart and helpful assitant. "
    "You analyze and summmarize content from websites. Respond in markdown."
)

In [4]:
def stream_gpt(prompt):
    messages= [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
    ]

    stream = openai.chat.completions.create(
        model ="gpt-4o-mini",
        messages = messages,
    stream = True
    )

    result=""

    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [5]:
class Website:
    url: str
    title: str
    text: str

    def __init__(self, url):
        self.url = url

        try:
            response = requests.get(url, timeout =10)
            response.raise_for_status()
            self.body = response.content
        except requests.exceptions.RequestException as e:
            self.title ="Error"
            self.text = f"Failed to fetch!"
            return

        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else {"No title found"}

        self.title = soup.title.string.strip() if soup.title else "No title found"

        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = "No body content found."

    def get_contents(self):
        return f"Website Title:\n{self.title}\nWebsite Content:\n{self.text}\n\n"

In [6]:
def scrape_and_analyze(url):
    website = Website(url)

    if website.text.startswith("Failed to fetch!"):
        yield website.text
        return
    prompt = f"Analyze the following website and summarize its key insights:\n\n{website.get_contents()}"

    yield from stream_gpt(prompt)

In [7]:
gr.Interface(
    fn = scrape_and_analyze,
    inputs=[gr.Textbox(label="Landing page URL including http:// or https://")],
    outputs=[gr.Markdown(label="Website Analysis")],
    flagging_mode="never"
).launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


