In [16]:
from exa_py import Exa
from phi.agent import Agent
from phi.tools.firecrawl import FirecrawlTools
from phi.model.openai import OpenAIChat
from phi.tools.duckduckgo import DuckDuckGo
import pandas as pd

print("All imports successful.")

All imports successful.


In [None]:
OPENAI_API_KEY = ""
EXA_API_KEY = ""
FIRECRAWL_API_KEY = ""

exa = Exa(api_key=EXA_API_KEY)
print("APIs initialized successfully.")

APIs initialized successfully.


In [18]:
firecrawl_tools = FirecrawlTools(
    api_key = FIRECRAWL_API_KEY,
    scrape=False,
    crawl=True,
    limit=5
)
firecrawl_agent = Agent(
    model=OpenAIChat(id="gpt-3.5-turbo", api_key=OPENAI_API_KEY),
    tools=[firecrawl_tools, DuckDuckGo()],
    show_tool_calls=True,
    markdown=True
)

In [19]:
comparison_agent = Agent(
    model = OpenAIChat(id="gpt-3.5-turbo", api_key=OPENAI_API_KEY),
    show_tools_calls = True,
    markdown = True
)
print("Agents created successfully")

Agents created successfully


In [20]:
def get_competitor_urls(url=None, description=None):
    """ 
    Find competitor URLs using Exa's serach capabilities

    Args:
        url: Company Website URL
        description: Company description text

    Returns:
        List of competitor URLs
    """
    if url:
        result = exa.find_similar(
            url=url,
            num_results=5,
            exclude_source_domain=True,
            category="Company"
        )
    elif description:
        result = exa.search(
            description,
            type="neural",
            category="Company",
            use_autoprompt=True,
            num_results=5
        )
    else:
        raise ValueError("Either 'url' or 'description' must be provided.") 
    competitor_urls = [item.url for item in result.results]
    return competitor_urls



In [21]:
get_competitor_urls(
    url="https://www.zoho.com/people/",
    description="Zoho People is a cloud-based HR software that helps businesses manage their human resources functions, including employee records, attendance, leave management, and performance reviews."
)

['https://www.zohocrm.com/',
 'https://www.freshworks.com/hrms/features/',
 'https://www.hrmsworld.com/zoho-people-hr-software-profile.html',
 'https://www.manageengine.com/projects.html',
 'https://getabettercrm.com/applications/zoho-one/hr-human-resources/']

In [22]:
test_url = "https://openai.com"
test_description = "OpenAI is an AI research and deployment company that aims to ensure that artificial general intelligence (AGI) benefits all of humanity."   
competitors = get_competitor_urls(url=test_url, description=test_description)
print(f"Found Competitors {competitors}")

Found Competitors ['https://openai.io/', 'https://open.ai/', 'https://www.openai.co/', 'https://platform.openai.com/;']


In [None]:
## Adjus this function to work with the multiple URL

def extract_competitor_info(competitor_url: str):
    """
    Extract detailed information from competitor websites
    
    Args:
        competitor_url: URL of competitor website
        
    Returns:
        Dictionary with competitor data
    """
    try:
        # Use AI agent to crawl and summarize the website
        crawl_response = firecrawl_agent.run(f"Crawl and summarize {competitor_url}")
        crawled_data = crawl_response.content
        
        return {
            "competitor": competitor_url,
            "data": crawled_data
        }
    except Exception as e:
        print(f"Error extracting info for {competitor_url}: {e}")
        return {
            "competitor": competitor_url,
            "error": str(e)
        }

# Test the function 
sample_data = extract_competitor_info(competitors[0])
print("Sample competitor data extracted!")
print(f"Data length: {len(str(sample_data))}")

In [26]:
print(sample_data)

{'competitor': 'https://openai.io/', 'data': '\nRunning:\n - crawl_website(url=https://openai.io/)\n\nI have crawled the website "https://openai.io/", but it seems that there is no content to summarize. You may want to visit the website directly for information.'}
