# A Full Business Solution for a Investment Management Company

The following script allows the user to do the following:

1) Retrive all the links that are useful to the target company
2) Generate an automatic 1 page pitch on why the company should be a "Buy" for an investment management firm

The user also has the flexibility in choosing out of three models for each task. They can choose from the following:

- gpt-4o-mini
- claude-3-haiku
- deepseek-chat

This allows the user to not have to rely on just one Agent but can choose from a list for both tasks adding extra flexibility.

Finally, this script uses the Gradio UI so anyone can have access to the public URL. The UI will also loads a webpage (html format) to a separate tab for the user to see the final pitch.

In [1]:
#!pip install --upgrade google-generativeai

In [2]:
#!pip install markdown

In [3]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
import google.generativeai
import anthropic

In [4]:
import gradio as gr

In [5]:
# Load environment variables in a file called .env
# Print the key prefixes to help with any debugging

load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:8]}")
else:
    print("Google API Key not set")

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-
Google API Key exists and begins AIzaSyA4


In [6]:
# Connect to OpenAI, Anthropic and Google; comment out the Claude or Google lines if you're not using them

openai = OpenAI()

claude = anthropic.Anthropic()

google.generativeai.configure()

In [7]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        try:
            # Add more comprehensive headers and a timeout
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                "Accept-Language": "en-US,en;q=0.5",
                "Connection": "keep-alive",
                "Upgrade-Insecure-Requests": "1",
                "Sec-Fetch-Dest": "document",
                "Sec-Fetch-Mode": "navigate",
                "Sec-Fetch-Site": "none",
                "Sec-Fetch-User": "?1",
                "Cache-Control": "max-age=0"
            }
            
            # Add a reasonable timeout to prevent hanging
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()  # Raise exception for 4XX/5XX status codes
            
            self.body = response.content
            soup = BeautifulSoup(self.body, 'html.parser')
            self.title = soup.title.string if soup.title else "No title found"
            
            if soup.body:
                for irrelevant in soup.body(["script", "style", "img", "input"]):
                    irrelevant.decompose()
                self.text = soup.body.get_text(separator="\n", strip=True)
            else:
                self.text = ""
                
            # More robust link extraction
            self.links = []
            for link in soup.find_all('a'):
                href = link.get('href')
                if href:
                    # Handle relative URLs properly
                    if href.startswith('/'):
                        # Parse the base URL to handle relative links
                        base_url = urlparse(url)
                        full_url = f"{base_url.scheme}://{base_url.netloc}{href}"
                        self.links.append(full_url)
                    elif href.startswith('http'):
                        self.links.append(href)
                    # Skip javascript: links, mailto: links, etc.
                    
            print(f"Successfully processed {url}")
            print(f"Found {len(self.links)} links")
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {url}: {e}")
            # Provide default values on error
            self.body = b""
            self.title = "Error fetching page"
            self.text = f"Could not fetch page content: {str(e)}"
            self.links = []

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

## First step: Have GPT-4o-mini figure out which links are relevant

### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON.  
It should decide which links are relevant, and replace relative links such as "/about" with "https://company.com/about".  
I will use "one shot prompting" in which I provide an example of how it should respond in the prompt.

This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!

In [8]:
link_system_prompt = """
You are a link extraction agent. Your task is to extract only the most relevant pages from a company's website that would support writing a professional 1-page investment pitch for institutional investors.

Return only a valid JSON object. All keys and string values must use double quotes ("). Do not include any commentary or extra text.Your output must include:
Your output must include:
- "homepage": the company homepage URL
- "links": an array of link objects, where each object has:
  - "type": a category (e.g., "about page", "investor relations page", etc.)
  - "url": the full URL

Example:
Input:
[
  "https://acme.com",
  "https://acme.com/about",
  "https://acme.com/investor-relations",
  "https://acme.com/contact"
]

Output:
{
  "homepage": "https://acme.com",
  "links": [
    {"type": "about page", "url": "https://acme.com/about"},
    {"type": "investor relations page", "url": "https://acme.com/investor-relations"}
  ]
}

Now return the JSON output for the following input:
"""

In [9]:
system_prompt = """
You are a senior investment analyst assistant tasked with creating a vibrant, high-quality, and professional 1-page stock pitch in Markdown format.

This pitch is for institutional investors deciding whether to BUY the stock.

Your response must:
- Be written in **Markdown**
- Include the **company logo** rendered using Clearbit, but only once, directly under the “Company Name and Website” section
- Be structured, scannable, and visually appealing using tasteful emojis and bold headers
- End with a clear, data-supported investment recommendation

### Recommended Sections:
- 📌 **Company Name and Website**
- ![Company Logo](https://logo.clearbit.com/{domain}) ← Place here
- 🏢 **Company Overview**
- ⚙️ **Products & Services**
- 💼 **Business Model**
- 📈 **Growth Strategy**
- 🌱 **ESG & Sustainability** (if relevant)
- 👥 **Leadership**
- 💰 **Financial & Investor Highlights**
- 🧠 **Competitive Edge**
- ✅ **Final Recommendation**

Keep the pitch concise, punchy, and under one page. Use real business reasoning, and end with a firm BUY/HOLD/SELL call.
Output only the final Markdown.
"""

In [10]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a pitch 1 pager about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [11]:
def stream_gpt(system_prompt, user_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    response_stream = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        stream=True,
        temperature=0.7
    )

    result = ""
    for chunk in response_stream:
        # 'delta' is an object, so we can't do delta.get("content")
        # Use .content directly (with a fallback)
        new_text = chunk.choices[0].delta.content or ""
        
        result += new_text
        yield result


In [12]:
def stream_claude(system_prompt, user_prompt):
    """
    Streams completion from Anthropic's Claude using the `anthropic` library.
    Yields partial output as it arrives.
    """
    result = claude.messages.stream(
        model="claude-3-haiku-20240307",
        max_tokens=1000,
        temperature=0.7,
        system=system_prompt,
        messages=[
            {"role": "user", "content": user_prompt},
        ],
    )
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response += text or ""
            yield response


In [13]:
# Deepseek

deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')

if deepseek_api_key:
    print(f"DeepSeek API Key exists and begins {deepseek_api_key[:3]}")
else:
    print("DeepSeek API Key not set - please skip to the next section if you don't wish to try the DeepSeek API")

DeepSeek API Key exists and begins sk-


In [14]:
def stream_deepseek(system_prompt, user_prompt):

    deepseek_via_openai_client = OpenAI(
    api_key=deepseek_api_key, 
    base_url="https://api.deepseek.com"
    )
    
    """
    Streams completion from DeepSeek using the OpenAI-compatible client.
    Yields partial output as it arrives.
    """
    # Prepare the conversation messages
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    
    # Create a streaming completion
    response_stream = deepseek_via_openai_client.chat.completions.create(
        model="deepseek-chat",
        messages=messages,
        stream=True,  # Enable streaming
        temperature=0.7
    )
    
    # Accumulate tokens and yield progressively
    result = ""
    for chunk in response_stream:
        # Check if there's new content in this chunk
        if hasattr(chunk.choices[0].delta, "content"):
            result += chunk.choices[0].delta.content
            yield result


In [15]:
def select_model_and_stream(model_choice, system_prompt, user_prompt):
    """
    yield partial output from the corresponding streaming function
    """
    # Now you can handle both system_prompt and user_prompt
    if model_choice == "gpt-4o-mini":
        yield from stream_gpt(system_prompt, user_prompt)
    elif model_choice == "claude-3-haiku-20240307":
        yield from stream_claude(system_prompt, user_prompt)
    elif model_choice == "deepseek-chat":
        yield from stream_deepseek(system_prompt, user_prompt)
    else:
        yield "Model not recognized."

In [16]:
def get_links(url, model_choice):
    website = Website(url)
    
    # If we couldn't get any links, return early with a default structure
    if not website.links:
        print(f"No links found at {url}")
        return {"homepage": url, "links": []}
    
    user_prompt = get_links_user_prompt(website)
    
    print(f"Found {len(website.links)} links. Sending to model for filtering...")
    
    # Log a sample of the links for debugging
    sample_links = website.links[:5]
    print(f"Sample links: {sample_links}")
    
    final_text = ""
    for partial_content in select_model_and_stream(
        model_choice,
        system_prompt=link_system_prompt,
        user_prompt=user_prompt
    ):
        final_text = partial_content

    print("DEBUG final_text =", repr(final_text))
    
    # Extract JSON from the response
    try:
        # First try parsing the entire response
        result = json.loads(final_text)
        print("Successfully parsed JSON response")
        return result
    except json.JSONDecodeError as e:
        print(f"JSON parsing error: {e}")
        # If that fails, try to extract JSON using regex
        import re
        json_pattern = r'(\{.*\})'  # Match content between curly braces
        match = re.search(json_pattern, final_text, re.DOTALL)
        
        if match:
            try:
                result = json.loads(match.group(1))
                print("Successfully extracted and parsed JSON using regex")
                return result
            except json.JSONDecodeError as e2:
                print(f"Extracted content is still not valid JSON: {e2}")
                # If JSON is still invalid, provide a default response
                return {"homepage": url, "links": []}
        else:
            print("No JSON structure found in the response")
            # If no JSON-like pattern found, return a default structure
            return {"homepage": url, "links": []}

## Second Step: Make the Pitch Deck!

Assemble all the details into another prompt to GPT4-o

In [17]:
def get_all_details(url, model_choice):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url, model_choice)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [18]:
def get_brochure_user_prompt(company_name, homepage_url, domain, relevant_links, page_contents=None):
    user_prompt = f"""
Here is the company data:

- Company Name: {company_name}
- Website: {homepage_url}
- Domain for logo: {domain}
- Relevant pages:
{json.dumps(relevant_links, indent=2)}
"""

    if page_contents:
        user_prompt += f"\n\n### Page Contents:\n{page_contents[:20000]}"

    user_prompt += "\n\nPlease use this to write a 1-page Markdown pitch for institutional investors."

    return user_prompt


In [19]:
from urllib.parse import urlparse
from IPython.display import display, Markdown

def create_brochure(company_name, homepage_url, relevant_links, model_choice):
    # Extract the domain from the homepage URL
    domain = urlparse(homepage_url).netloc
    
    # Build the user prompt
    user_prompt = get_brochure_user_prompt(
        company_name=company_name,
        homepage_url=homepage_url,
        domain=domain,
        relevant_links=relevant_links
    )
    
    # We'll accumulate the streamed text into final_text
    final_text = ""
    # Stream from whichever model the user selected
    for partial_content in select_model_and_stream(
        model_choice=model_choice,
        system_prompt=system_prompt,
        user_prompt=user_prompt
    ):
        # Here, we keep overwriting final_text with the latest cumulative partial_content
        final_text = partial_content
    
    # Once streaming is done, we have the entire final text. Now display it as Markdown.
    display(Markdown(final_text))

## Finally - A Minor Improvement

With a small adjustment, I can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [20]:
pitch_system_prompt = """
You are a senior investment analyst assistant tasked with creating a vibrant, high-quality, and professional 1-page stock pitch in Markdown format.

This pitch is for institutional investors deciding whether to BUY the stock.

Your response must:
- Be written in **Markdown**
- Include the **company logo** rendered using Clearbit, but only once, directly under the “Company Name and Website” section
- Be structured, scannable, and visually appealing using tasteful emojis and bold headers
- End with a clear, data-supported investment recommendation

### Recommended Sections:
- 📌 **Company Name and Website**
- ![Company Logo](https://logo.clearbit.com/{domain}) ← Place here
- 🏢 **Company Overview**
- ⚙️ **Products & Services**
- 💼 **Business Model**
- 📈 **Growth Strategy**
- 🌱 **ESG & Sustainability** (if relevant)
- 👥 **Leadership**
- 💰 **Financial & Investor Highlights**
- 🧠 **Competitive Edge**
- ✅ **Final Recommendation**

Keep the pitch concise, punchy, and under one page. Use real business reasoning, and end with a firm BUY/HOLD/SELL call.
Output only the final Markdown.
"""


In [21]:
def gradio_stream_pitch(company_name: str, homepage_url: str, relevant_links, model_choice: str, save_as_html: bool):
    """
    A generator function for Gradio to produce partial streaming output in Markdown.
    Yields partial text as it's generated. After streaming completes,
    optionally saves the final text as HTML and opens it in a browser.
    """
    # Parse the URL to extract domain for the logo
    domain = urlparse(homepage_url).netloc
    
    # Create a safe filename from company name or domain
    safe_filename = ""
    if company_name and company_name.strip():
        # Use company name if provided
        safe_filename = company_name.strip()
    else:
        # Use domain name as fallback
        safe_filename = domain
    
    # Replace invalid filename characters with underscores
    import re
    safe_filename = re.sub(r'[\\/*?:"<>|]', "_", safe_filename)
    safe_filename = safe_filename.replace(' ', '_')
    
    # Build the user prompt
    user_prompt = get_brochure_user_prompt(company_name, homepage_url, domain, relevant_links)

    final_text = ""
    for partial_text in select_model_and_stream(model_choice, system_prompt, user_prompt):
        final_text = partial_text
        # Provide partial_text to Gradio for streaming display
        yield final_text  

    # After streaming is done, optionally save to an HTML file
    if save_as_html and final_text.strip():
        html_output = md_to_html(final_text)
        filename = f"{safe_filename}_pitch.html"
        try:
            # Save the HTML file
            with open(filename, "w", encoding="utf-8") as f:
                f.write(html_output)
            print(f"Pitch saved to: {filename}")
            
            # Open the HTML file in a new browser window
            import webbrowser
            import os
            file_path = os.path.abspath(filename)
            webbrowser.open('file://' + file_path, new=2)  # new=2 opens in a new tab
            
            # Add a message to the output indicating the file was saved and opened
            success_message = f"\n\n---\n\nPitch saved to file: `{filename}` and opened in browser."
            yield final_text + success_message
        except Exception as e:
            error_msg = f"Error saving/opening file: {str(e)}"
            print(error_msg)
            yield final_text + f"\n\n---\n\nError: {error_msg}"

In [22]:
def test_model(model_name, system_prompt, user_prompt):
    """
    Test a single model by calling select_model_and_stream
    with the given system_prompt and user_prompt.
    Prints partial chunks as they arrive, then checks final output.
    """
    print(f"--- Testing {model_name} ---")
    partial_chunks = []
    
    # Call your model streaming router
    for partial_output in select_model_and_stream(model_name, system_prompt, user_prompt):
        partial_chunks.append(partial_output)
        print("Partial output:")
        print(partial_output)
        print("-" * 40)

    if not partial_chunks:
        raise ValueError(f"No output was returned by model: {model_name}")

    final_output = partial_chunks[-1]
    if not final_output.strip():
        raise ValueError(f"Final output was empty for model: {model_name}")

    print(f"\nFinal output from {model_name}:")
    print(final_output)
    print("=" * 60 + "\n")

def test_all_models():
    """
    Test script that iterates over your three model choices
    and ensures each can produce a non-empty response.
    """
    # Replace these with your actual model names or keys
    model_names = [
        "gpt-4o-mini",
        "claude-3-haiku-20240307",
        "deepseek-chat"
    ]

    # Example system and user prompts
    system_prompt = "You are a helpful AI. Please follow instructions carefully."
    user_prompt = "Hello! Can you introduce yourself briefly?"

    for model_name in model_names:
        try:
            test_model(model_name, system_prompt, user_prompt)
        except Exception as e:
            print(f"Error while testing {model_name}: {e}\n")

# Optionally run it immediately if in a .py script:
#if __name__ == "__main__":
    #test_all_models()


In [23]:
def test_get_links_function():
    # Change to a real URL you want to test
    test_url = "https://investor.vanguard.com"
    # Use one of your recognized model strings, e.g. "GPT-4o-mini" or "Claude-3-haiku-20240307"
    test_model_choice = "gpt-4o-mini"
    
    print(f"Testing get_links with URL={test_url} and model={test_model_choice}")
    try:
        links_data = get_links(test_url, test_model_choice)
        print("\n--- get_links return value ---")
        print(links_data)
        print("--- End of return value ---\n")
        
        if isinstance(links_data, dict):
            print("Looks like we got a dict, presumably with homepage and links keys.")
            print("homepage:", links_data.get("homepage"))
            print("links:", links_data.get("links", []))
        elif isinstance(links_data, list):
            print("We got a list. Here are the items:")
            for item in links_data:
                print(item)
        else:
            print("Result is neither dict nor list, here's the raw output:")
            print(links_data)
    
    except json.JSONDecodeError as e:
        print("JSONDecodeError: The response might not be valid JSON. Error details:", e)
    except Exception as e:
        print("An error occurred while testing get_links:", str(e))

# Now we call the test
#test_get_links_function()

In [24]:
def test_get_all_details():
    # Use a test URL that should return valid webpage content
    test_url = "https://investor.vanguard.com"
    # Use a test model that you expect to work (e.g., "gpt-4o-mini", "claude-3-haiku-20240307", "deepseek-chat")
    test_model_choice = "gpt-4o-mini"
    
    print(f"Testing get_all_details with URL: {test_url} and model: {test_model_choice}")
    
    try:
        # Call get_all_details with the URL and selected model
        details = get_all_details(test_url, test_model_choice)
        
        print("----- get_all_details Output -----")
        print(details)
        print("----- End of Output -----")
    
    except json.JSONDecodeError as json_err:
        print("JSONDecodeError: The output may not be valid JSON. Error details:", json_err)
    except Exception as e:
        print("An error occurred while testing get_all_details:", str(e))

# Run the test function
#test_get_all_details()

In [25]:
def test_create_brochure():
    # Test parameters
    company_name = "Vangaurd"
    homepage_url = "https://investor.vanguard.com"
    
    # Sample relevant links in the expected JSON structure
    relevant_links = {
        "homepage": "https://investor.vanguard.com",
        "links": [
            {"type": "Investor Relations", "url": "https://corporate.vanguard.com/"},
            {"type": "About Page", "url": "https://corporate.vanguard.com/content/corporatesite/us/en/corp/who-we-are/sets-us-apart/index.html"}
        ]
    }
    
    # Choose a model based on your UI choices (e.g., "gpt-4o-mini", "claude-3-haiku-20240307", or "deepseek-chat")
    test_model_choice = "gpt-4o-mini"
    
    print("Testing create_brochure with:")
    print("Company Name:", company_name)
    print("Homepage URL:", homepage_url)
    print("Relevant Links:", relevant_links)
    print("Model Choice:", test_model_choice)
    
    # Call the create_brochure function. It will stream and display the Markdown output.
    create_brochure(company_name, homepage_url, relevant_links, test_model_choice)

# Run the test function
#test_create_brochure()

In [26]:
def test_gradio_stream_pitch():
    # Test input parameters
    company_name = "Vangaurd"
    homepage_url = "https://investor.vanguard.com"
    # Dummy relevant links structured as expected
    relevant_links = {
        "homepage": "https://www.testcompany.com",
        "links": [
            {"type": "Investor Relations", "url": "https://corporate.vanguard.com/"},
            {"type": "About Page", "url": "https://corporate.vanguard.com/content/corporatesite/us/en/corp/who-we-are/sets-us-apart/index.html"}
        ]
    }
    # Use a valid model choice; adjust as needed based on your implementation
    model_choice = "gpt-4o-mini"
    # For testing purposes, set to False so no HTML file is saved
    save_as_html = False

    print("Starting test of gradio_stream_pitch...\n")
    
    # Create the generator object
    generator = gradio_stream_pitch(company_name, homepage_url, relevant_links, model_choice, save_as_html)
    
    prev_output = ""
    # Iterate over the generator to simulate streaming output
    for cumulative_output in generator:
        # Print only the new portion
        new_chunk = cumulative_output[len(prev_output):]
        if new_chunk:  # Only print if there's something new
            print("New chunk received:")
            print(new_chunk)
            print("-" * 40)
        prev_output = cumulative_output
    
    print("\nTest of gradio_stream_pitch complete.")
    
# Run the test function
#test_gradio_stream_pitch()

In [27]:
def test_get_links():
    # Set a test URL that should return valid content
    test_url = "https://investor.vanguard.com/"
    # Set a model choice that your code recognizes (adjust if necessary)
    test_model_choice = "gpt-4o-mini"
    
    print(f"Testing get_links with URL: {test_url} and model: {test_model_choice}", flush=True)
    
    try:
        # Call your get_links function
        links_output = get_links(test_url, test_model_choice)
        print("----- Final JSON Output from get_links -----", flush=True)
        print(links_output, flush=True)
    except json.decoder.JSONDecodeError as json_err:
        print("JSONDecodeError: The output may not be valid JSON. Debug info:", flush=True)
        print("Error details:", json_err, flush=True)
    except Exception as e:
        print("An error occurred while testing get_links:", str(e), flush=True)

# Run the test function
#test_get_links()

In [28]:
def md_to_html(markdown_text):
    """
    Convert Markdown text to HTML for saving to a file.
    Uses markdown library if available, otherwise does basic conversion.
    Also removes markdown code block fences.
    """
    # Remove ```markdown and ``` delimiters if present
    import re
    markdown_text = re.sub(r'```markdown\s*', '', markdown_text)
    markdown_text = re.sub(r'```\s*$', '', markdown_text, flags=re.MULTILINE)
    
    try:
        # Try to use the markdown library if available
        import markdown
        html = markdown.markdown(markdown_text)
        
        # Wrap the HTML content in a basic HTML document structure
        complete_html = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Investment Pitch</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            line-height: 1.6;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }}
        h1, h2, h3 {{
            color: #2c3e50;
        }}
        img {{
            max-width: 100%;
            height: auto;
        }}
        code {{
            background-color: #f8f8f8;
            padding: 2px 4px;
            border-radius: 3px;
        }}
        pre {{
            background-color: #f8f8f8;
            padding: 10px;
            border-radius: 5px;
            overflow-x: auto;
        }}
        table {{
            border-collapse: collapse;
            width: 100%;
        }}
        th, td {{
            border: 1px solid #ddd;
            padding: 8px;
        }}
        th {{
            background-color: #f2f2f2;
        }}
    </style>
</head>
<body>
    {html}
</body>
</html>"""
        return complete_html
        
    except ImportError:
        # Fallback for basic conversion if markdown library is not available
        import re
        
        # Basic conversion rules
        html = markdown_text
        # Headers
        html = re.sub(r'^# (.*?)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
        html = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
        html = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
        
        # Bold and italic
        html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
        html = re.sub(r'\*(.*?)\*', r'<em>\1</em>', html)
        
        # Lists
        html = re.sub(r'^- (.*?)$', r'<li>\1</li>', html, flags=re.MULTILINE)
        html = re.sub(r'(<li>.*?</li>\n)+', r'<ul>\g<0></ul>', html, flags=re.DOTALL)
        
        # Links
        html = re.sub(r'\[(.*?)\]\((.*?)\)', r'<a href="\2">\1</a>', html)
        
        # Images
        html = re.sub(r'!\[(.*?)\]\((.*?)\)', r'<img src="\2" alt="\1">', html)
        
        # Paragraphs
        html = re.sub(r'\n\n(.*?)\n\n', r'<p>\1</p>\n\n', html, flags=re.DOTALL)
        
        # Wrap in HTML document
        complete_html = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Investment Pitch</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            line-height: 1.6;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }}
    </style>
</head>
<body>
    {html}
</body>
</html>"""
        return complete_html

In [29]:
def build_gradio_app():
    with gr.Blocks(css="""
    #title {
        text-align: center;
        font-size: 26px;
        font-weight: bold;
        margin-bottom: 0.8rem;
    }
    #subtitle {
        text-align: center;
        font-size: 16px;
        margin-top: 0;
        color: #666;
    }
    .company-logo {
        max-width: 200px;
        display: block;
        margin: 0 auto;
    }
    """) as demo:

        gr.Markdown(
    """
    <div id='title'>AI Trading & Investment Analysis</div>
    <div id='subtitle'>Generate data-driven company pitches and relevant links.</div>
    """
        )

        # Match the model dropdown options to the actual model names used in the code
        model_options = [
            "gpt-4o-mini",           # OpenAI model
            "claude-3-haiku-20240307", # Claude model
            "deepseek-chat"           # DeepSeek model
        ]

        # Tab 1: Retrieve Relevant Links
        with gr.Tab("1) Retrieve Links"):
            link_model_choice = gr.Dropdown(
                label="Model for Link Retrieval",
                choices=model_options,
                value=model_options[0],
                interactive=True
            )
            
            homepage_url = gr.Textbox(
                label="Company Homepage URL",
                placeholder="e.g. https://www.tesla.com",
                lines=1
            )
            
            link_output = gr.JSON(label="Relevant Links", value={"homepage": "", "links": []}, visible=True)
            
            retrieve_button = gr.Button("Retrieve Links")
            link_status = gr.Markdown(label="Status")

        # Tab 2: Create Trading Pitch
        with gr.Tab("2) Create Pitch"):
            pitch_model_choice = gr.Dropdown(
                label="Model for Pitch Creation",
                choices=model_options,
                value=model_options[0],
                interactive=True
            )
            
            save_as_html_checkbox = gr.Checkbox(
                label="Save final pitch as HTML?",
                value=True
            )
            
            company_name_input = gr.Textbox(
                label="Company Name",
                lines=1,
                placeholder="e.g. Tesla Inc."
            )
            
            # Add a preview for the company logo
            with gr.Row():
                logo_preview = gr.Image(label="Company Logo Preview", visible=False)
                logo_status = gr.Markdown(visible=False)
            
            # Use HTML instead of Markdown for better rendering
            pitch_output = gr.HTML(label="Investment Pitch")
            generate_button = gr.Button("Generate Trading Pitch")

        # Step 1 function - enhanced with better error handling
        def retrieve_links_fn(url, model):
            if not url:
                return {"homepage": "", "links": []}, "Please enter a URL"
            
            if not url.startswith("http"):
                url = "https://" + url
            
            try:
                result = get_links(url, model)
                return result, f"Successfully retrieved links from {url}"
            except Exception as e:
                error_msg = f"Error retrieving links: {str(e)}"
                print(error_msg)
                return {"homepage": url, "links": []}, error_msg

        # Function to preview logo when URL changes
        def update_logo_preview(url):
            if not url:
                return None, "Enter a URL to preview the logo"
            
            domain = urlparse(url).netloc
            if not domain:
                return None, "Invalid URL"
                
            logo_url = f"https://logo.clearbit.com/{domain}"
            return logo_url, f"Logo preview for {domain}"
        
        # Connect URL input to logo preview
        homepage_url.change(
            fn=update_logo_preview,
            inputs=homepage_url,
            outputs=[logo_preview, logo_status]
        )
        
        retrieve_button.click(
            fn=retrieve_links_fn,
            inputs=[homepage_url, link_model_choice],
            outputs=[link_output, link_status]
        )

        # Step 2 function (streaming with HTML output)
        def generate_pitch_fn(company_name, url, link_list, model_choice, save_as_html):
            # link_list is the JSON from the previous step
            if not company_name or not url:
                yield "Please provide both a Company Name and a Homepage URL."
                return

            # Make sure we're working with the right structure
            if isinstance(link_list, dict) and "links" in link_list:
                links = link_list["links"]
                homepage = link_list.get("homepage", url)
            elif isinstance(link_list, list):
                links = link_list
                homepage = url
            else:
                links = []
                homepage = url
            
            # Extract domain for logo
            domain = urlparse(homepage).netloc
            
            # Print debug info to help with troubleshooting
            print(f"Generating pitch for {company_name} ({homepage})")
            print(f"Using model: {model_choice}")
            print(f"Links: {links}")

            # Prepare the user prompt for the model
            user_prompt = get_brochure_user_prompt(company_name, homepage, domain, links)

            # Stream directly from the model and convert to HTML
            markdown_content = ""
            for partial_content in select_model_and_stream(
                model_choice=model_choice,
                system_prompt=system_prompt,
                user_prompt=user_prompt
            ):
                # Clean up markdown code block markers
                import re
                cleaned_text = re.sub(r'```markdown\s*', '', partial_content)
                cleaned_text = re.sub(r'```\s*$', '', cleaned_text, flags=re.MULTILINE)
                
                # Convert to HTML for better rendering
                html_content = md_to_html(cleaned_text)
                yield html_content
                markdown_content = cleaned_text  # Save for file output

            # After streaming is done, optionally save to an HTML file
            if save_as_html and markdown_content.strip():
                # Create a safe filename
                safe_filename = re.sub(r'[\\/*?:"<>|]', "_", company_name.strip() if company_name.strip() else domain)
                safe_filename = safe_filename.replace(' ', '_')
                
                html_output = md_to_html(markdown_content)
                filename = f"{safe_filename}_pitch.html"
                try:
                    # Save the HTML file
                    with open(filename, "w", encoding="utf-8") as f:
                        f.write(html_output)
                    print(f"Pitch saved to: {filename}")
                    
                    # Open the HTML file in a new browser window
                    import webbrowser
                    import os
                    file_path = os.path.abspath(filename)
                    webbrowser.open('file://' + file_path, new=2)  # new=2 opens in a new tab
                    
                    # Add a message to the final HTML output
                    success_html = html_output + f"""
                    <div style="margin-top: 30px; padding: 10px; background-color: #e6f7e6; border: 1px solid #28a745; border-radius: 5px;">
                        <p>✅ Pitch saved to file: <code>{filename}</code> and opened in browser.</p>
                    </div>
                    """
                    yield success_html
                except Exception as e:
                    error_msg = f"Error saving/opening file: {str(e)}"
                    print(error_msg)
                    
                    # Add error message to the HTML output
                    error_html = html_output + f"""
                    <div style="margin-top: 30px; padding: 10px; background-color: #f8d7da; border: 1px solid #dc3545; border-radius: 5px;">
                        <p>❌ Error: {error_msg}</p>
                    </div>
                    """
                    yield error_html

        generate_button.click(
            fn=generate_pitch_fn,
            inputs=[company_name_input, homepage_url, link_output, pitch_model_choice, save_as_html_checkbox],
            outputs=pitch_output
        )

    return demo

In [30]:
demo = build_gradio_app()
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7872
* Running on public URL: https://6b59b525826e78dd95.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


