In [10]:
from dotenv import load_dotenv
import os
import google.generativeai as genai
genai.configure(api_key=os.getenv("GEMINI_KEY"))
load_dotenv()

from firecrawl import FirecrawlApp
fcapp = FirecrawlApp(api_key=os.getenv("FIRECRAWL_KEY"))
gemini_generation_config = {
        "temperature": 1,
        "top_p": 0.95,
        "top_k": 40,
        "max_output_tokens": 8192,
    }

In [31]:

strucutred_gemini_generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8192,
  "response_mime_type": "application/json",
}


In [11]:
data = fcapp.scrape_url("https://raisegate.com")

In [12]:
from typing import List
from pydantic import BaseModel

class searchQueries(BaseModel):
    queries: List[str]

In [13]:
analysis_system_prompt="""Adopt the role of a website analyst. You will be provided the scraped markdown data of a given website, from this site, you are to recognise the following:

1. Product/Service Categories
    - Specific product names and descriptions
    - Service offerings and their descriptions
    - Industry-specific terminology and jargon
    - Target market segments mentioned
    - Core problems they claim to solve

3. Business Model Identifiers
    - Pricing structure hints (B2B, B2C, enterprise)
    - Target customer size (SMB, Enterprise, etc.)
    - Sales model

4. Market Positioning
    - Key value propositions
    - Mission statements
    - Partner ecosystem"""

In [14]:
analysis_model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    system_instruction=analysis_system_prompt,
    generation_config=gemini_generation_config
)

In [15]:
llmanalysis = analysis_model.generate_content(f"Scraped website data: {data}")

In [16]:
print(llmanalysis.text)

Okay, here's my analysis of the provided website data for RaiseGate:

**1. Product/Service Categories:**

*   **Core Service:**  RaiseGate provides a platform that connects startups seeking funding with Venture Capital (VC) firms and angel investors. It aims to streamline the fundraising and investment process.
*   **Specific Product/Service Offerings:**
    *   **AI-Powered Startup Discovery:** Using "Scout AI" to match VCs with relevant startups, filtering them based on criteria.
    *   **Streamlined Data Rooms:** Facilitating the sharing of pitch decks and other sensitive information with permission control from startups.
    *   **Comprehensive Startup Profiles:** Providing detailed information on startups, including team, market insights, business models, demo videos, and investment theses.
    *   **Demo Videos:** Offering a platform for startups to showcase their products/ideas directly to potential investors through videos.
    *   **Deck Requests:** Enabling VCs to request pi

In [17]:
searchquery_system_prompt="""You are a helpful assistant that will be provided some information about a website. This website will contain data about a particular company 
that offers some products and/or services. Your job, is to, from this data give, construct 5 search queries that might result in companies that
do/offer similar products/services. focusing on what the given company does. essentially searching for potential competitors that offer similar products/services.
These 5 queries must distill the essence of their product, each query must be a distinct core value add that the company provides, and queries must not overlap.
 Return your data in a List of Strings format that is parsable in python."""

In [18]:
searchquery_model=genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    generation_config=gemini_generation_config,
    system_instruction=searchquery_system_prompt,
)

In [19]:
queries = searchquery_model.generate_content(f"Company analysis: {llmanalysis}")
print(queries.text)

```
[
"AI powered startup investor matching platform",
"venture capital deal flow software",
"startup fundraising data room platform",
"early stage investment marketplace",
"startup pitch deck sharing platform"
]
```



In [27]:
cleaned_queries = eval(queries.text.replace('```python', '').replace('```', '').strip())
if not isinstance(cleaned_queries, list) or len(cleaned_queries) != 5:
    raise ValueError("Expected exactly 5 queries in a list format")
print(cleaned_queries)

['AI powered startup investor matching platform', 'venture capital deal flow software', 'startup fundraising data room platform', 'early stage investment marketplace', 'startup pitch deck sharing platform']


In [28]:
import requests
import json
import os
setofparams=[]
setofresults=[]
for q in cleaned_queries:
    params={
        "api_key":os.getenv("SERPAPI_KEY"),
        "engine": "google",
        "q": f"{q}",
        "google_domain": "google.com",
        "gl": "us",
        "hl": "en"
    }
    setofparams.append(params)

for params in setofparams:
    url = "https://serpapi.com/search"
    response = requests.get(url, params=params)
    results = response.json()
    setofresults.append(results)

for result in setofresults:
    print(result["organic_results"])

[{'position': 1, 'title': 'InvestorMatch.ai: Home', 'link': 'https://investormatch.ai/', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://investormatch.ai/&ved=2ahUKEwjV-4iCjoCLAxVt3ckDHUBxDF4QFnoECBMQAQ', 'displayed_link': 'https://investormatch.ai', 'favicon': 'https://serpapi.com/searches/678c0f0594d64c574ca14d1a/images/773e29d0de900a292e39e3780d3a4e458bfb8a5c52ec7bcda939836ae31ee160.png', 'date': 'Oct 31, 2024', 'snippet': 'InvestorMatch.ai uses smart AI algorithms to optimize and streamline the matchmaking process between fund providers and founders.', 'snippet_highlighted_words': ['ai', 'AI'], 'source': 'InvestorMatch.ai'}, {'position': 2, 'title': 'Investor Matching – Connecting startups with investors', 'link': 'https://www.gilion.com/platform/investor-matching', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.gilion.com/platform/investor-matching&ved=2ahUKEwjV-4iCjoCLAxVt3ckDHUBxDF4QFnoE

In [29]:
processed_results = []

for result in setofresults:
    organic = result["organic_results"][:3]  # Get first 3 organic results
    
    record = []
    for entry in organic:
        record.append({
            "title": entry["title"],
            "link": entry["link"], 
            "snippet": entry["snippet"]
        })
    processed_results.append(record)

print(json.dumps(processed_results, indent=2))



[
  [
    {
      "title": "InvestorMatch.ai: Home",
      "link": "https://investormatch.ai/",
      "snippet": "InvestorMatch.ai uses smart AI algorithms to optimize and streamline the matchmaking process between fund providers and founders."
    },
    {
      "title": "Investor Matching \u2013 Connecting startups with investors",
      "link": "https://www.gilion.com/platform/investor-matching",
      "snippet": "Access exclusive deals and investment opportunities powered by advanced data analytics and AI-driven insights. Due Dilligence. Streamline your growth with ..."
    },
    {
      "title": "SuperWarm.AI",
      "link": "https://superwarm.ai/",
      "snippet": "Use AI to connect with both startups and investors that align with your investment thesis \u00b7 Receive free vetted deal flow and discover promising startups before ..."
    }
  ],
  [
    {
      "title": "Edda - Dealflow & Portfolio Management Software for Venture ...",
      "link": "https://edda.co/",
      "sni

In [32]:
competitorfinder_system_prompt="""You are a helpful assistant who has json mode enabled. Your responses will strictly be in json format. You will be given 1. the details of a company, 
these details represent the core purpose, features and USPs of a company. You will also be given 2. 15 links that are probably competitors. Here is your job:
Find the 5 top companies from the 15 probably competitors that most closely compete with the core values of the company initially mentioned in 1. Return your output as the records of the json
as seen in 2. (For example if you are given title link and snippet, return the same 3 of those top 5 companies)"""
competitorfinder_model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    system_instruction=competitorfinder_system_prompt,
    generation_config=strucutred_gemini_generation_config
)

In [34]:
competitors = competitorfinder_model.generate_content(f"Company Data: {cleaned_queries}, Probable competitors: {processed_results}")

In [35]:
print(competitors.text)

[
  {
    "title": "InvestorMatch.ai: Home",
    "link": "https://investormatch.ai/",
    "snippet": "InvestorMatch.ai uses smart AI algorithms to optimize and streamline the matchmaking process between fund providers and founders."
  },
  {
    "title": "Edda - Dealflow & Portfolio Management Software for Venture ...",
    "link": "https://edda.co/",
    "snippet": "The most advanced software for venture capital and private equity. Edda powers your dealflow, portfolio, and network with specialized intelligence."
  },
   {
    "title": "Virtual Data Room for Startup Fundraising",
    "link": "https://digify.com/startup.html",
    "snippet": "Digify provides a secure and easy-to-setup data room that allows you to distribute confidential deal information to investors while keeping control of them."
  },
  {
    "title": "Marketplace Companies with Early Stage Venture Funding",
    "link": "https://www.crunchbase.com/hub/marketplace-companies-early-stage-venture-funding",
    "snippet": "