Import

In [30]:
!pip install pdfplumber requests google-generativeai pytesseract pillow



In [31]:
# Import Google Gemini AI for compatibility analysis
import google.generativeai as genai
# Import json for handling JSON data
import json
# Import re for regular expression text preprocessing
import re

API Configuration and Initial Setup

In [32]:
# 🔹 Configure Gemini API
genai.configure(api_key="AIzaSyDwLiS2uHId79Lhn2mwdr7dhNHZXYoHZl0")  # Replace with your actual API key - Configures the Gemini API with a provided key
GEMINI_MODEL = "models/gemini-1.5-flash-001-tuning"  # Defines the specific Gemini model to use for analysis

# 🔹 Initial Investor Dataset
INVESTOR_DATA = []  # Initializes an empty list to store investor data loaded from JSON

# 🔹 Match Results Storage
match_results = None  # Initializes a variable to store match results (replacing session state)

# Define a default value for max_results
MAX_RESULTS_DEFAULT = 10  # Sets the default maximum number of investor matches to return

Compatibility Analysis Function

In [33]:
# 🔹 Compatibility Analysis Functions
def analyze_compatibility_gemini(founder_data, investor_data):
    """
    Analyzes compatibility between a founder and an investor using Gemini AI.
    Args:
        founder_data (dict): Data about the founder's startup
        investor_data (dict): Data about the investor's preferences
    Returns:
        tuple: (match_score, reasoning) or (0, error_message) if analysis fails
    """
    try:  # Starts a try block to catch any errors during compatibility analysis
        # Constructs a prompt for Gemini AI with founder and investor data, using .get() for safe key access
        prompt = (
            f"Evaluate the compatibility between a startup founder and an investor based on the following data:\n"
            f"Founder Data: Industry: {founder_data.get('industry', 'Unknown')}, Stage: {founder_data.get('stage', 'Unknown')}, "
            f"Funding Required: ${founder_data.get('funding_required', 'Unknown')}, Traction: {founder_data.get('traction', 'Unknown')}, "
            f"Business Model: {founder_data.get('business_model', 'Unknown')}\n"
            f"Investor Preferences: Preferred Industry: {investor_data.get('Industry', 'Unknown')}, "
            f"Investment Range: {investor_data.get('Cheque_range', 'Unknown')}, Preferred Stage: {investor_data.get('Stage', 'Unknown')}, "
            f"Countries: {investor_data.get('Countries', 'Unknown')}\n"
            "Return a valid JSON object in this format:\n"
            "```json\n"
            "{\n"
            '  "match_score": <integer between 0-100>,\n'
            '  "reasoning": "<Explanation of compatibility>"\n'
            "}\n"
            "```\n"
        )
        model = genai.GenerativeModel(GEMINI_MODEL)  # Initializes the Gemini model with the specified model name
        response = model.generate_content(prompt)  # Sends the prompt to Gemini AI and gets the response
        json_match = re.search(r'```json\n(.*?)\n```', response.text.strip(), re.DOTALL)  # Extracts JSON from the response using regex
        if json_match:  # Checks if a valid JSON block was found in the response
            result = json.loads(json_match.group(1))  # Parses the extracted JSON string into a Python dictionary
            return result.get("match_score", 0), result.get("reasoning", "No reasoning provided.")  # Returns score and reasoning with defaults
        else:
            return 0, "Evaluation failed: Invalid response format from Gemini AI."  # Returns 0 and error if JSON not found
    except genai.GenerationError as ge:  # Catches specific errors related to Gemini AI generation
        print(f"❌ Gemini API generation error: {ge}")  # Prints error to notebook output
        return 0, f"Evaluation failed due to Gemini API error: {str(ge)}"  # Returns 0 and detailed error message
    except json.JSONDecodeError as je:  # Catches errors when JSON parsing fails
        print(f"❌ JSON parsing error: {je}")  # Prints error to notebook output
        return 0, f"Evaluation failed due to invalid JSON format: {str(je)}"  # Returns 0 and detailed error message
    except Exception as e:  # Catches any other unexpected errors
        print(f"❌ Unexpected error in compatibility analysis: {e}")  # Prints error to notebook output
        return 0, f"Evaluation failed due to unexpected error: {str(e)}"  # Returns 0 and detailed error message

Match Calculation Function

In [34]:
def calculate_matches(founder_data, investors_list, min_score, max_results):
    """
    Calculates matches between founder data and a list of investors.
    Args:
        founder_data (dict): Founder's startup data
        investors_list (list): List of investor dictionaries
        min_score (int): Minimum compatibility score to include
        max_results (int): Maximum number of matches to return
    Returns:
        list: Sorted list of matches or empty list if no investors
    """
    try:  # Starts a try block to handle errors during match calculation
        if not investors_list:  # Checks if the investor list is empty
            print("Upload the investor data first")  # Prints an error message to notebook output
            return []  # Returns an empty list if no investor data is provided
        matches = []  # Initializes an empty list to store match results
        for investor in investors_list:  # Loops through each investor in the list
            score, reasoning = analyze_compatibility_gemini(founder_data, investor)  # Gets compatibility score and reasoning
            if score >= min_score:  # Checks if the score meets the minimum threshold
                matches.append({  # Adds a match dictionary to the list
                    "investor_name": investor.get("Name", "Unknown Investor"),  # Safely gets investor name with default
                    "match_score": score,  # Stores the compatibility score
                    "reasoning": reasoning  # Stores the reasoning for the match
                })
        matches.sort(key=lambda x: x["match_score"], reverse=True)  # Sorts matches by score in descending order
        return matches[:max_results]  # Returns the top matches up to max_results
    except Exception as e:  # Catches any unexpected errors during match calculation
        print(f"❌ Error calculating matches: {e}")  # Prints error to notebook output
        return []  # Returns an empty list to prevent further processing

Load Investor Data

In [35]:
# 🔹 Load Investor Dataset (Sample Data)
INVESTOR_DATA = [
    {
        "Name": "Blast.Club",
        "Website": "https://blast.club/",
        "Global_HQ": "France",
        "Countries": "Andorra,Belgium,France,Germany,Italy,Luxembourg,Portugal,Spain",
        "Stage": "Pre-seed, Seed, Series+",
        "Overview": "We invest in French and European entrepreneurs (generalist thesis, marketplaces, fintech, insurtech, IoT, web3, SaaS, deeptech, hardware, medtech, AI, consumer, DNVB) from Seed to Series+.",
        "Type": "VC",
        "Industry": "IoT, SaaS, Consumer, Edtech, Fintech, Hardware, Marketplace",
        "Cheque_range": "$500K - $5M",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Blisce/",
        "Website": "https://blisce.com/",
        "Global_HQ": "France",
        "Countries": "France,USA,UK,Germany,Netherlands,Sweden,Denmark,Norway,Spain,Italy,Finland",
        "Stage": "Pre-seed, Series A",
        "Overview": "We invest in the future of B2C.",
        "Type": "VC",
        "Industry": "B2C",
        "Cheque_range": "$10M - $20M",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Block Dojo",
        "Website": "https://blockdojo.io",
        "Global_HQ": "United Kingdom",
        "Countries": "UK",
        "Stage": "Pre-seed, Idea, Prototype/MVP",
        "Overview": "We incubate any founder looking to build in Web3",
        "Type": "Incubator, Accelerator",
        "Industry": "We incubate any founder looking to build in Web3",
        "Cheque_range": "$12K - $12K",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Blockchain Founders Fund",
        "Website": "https://blockchainff.com/",
        "Global_HQ": "Singapore",
        "Countries": "Central African Republic,Canada,USA,India,Indonesia,Singapore,Vietnam,France,Portugal,Spain,UK,Israel",
        "Stage": "Pre-seed, Idea, Prototype/MVP, Seed, Series+",
        "Overview": "We invest in pre-seed to seed stage, blockchain, crypto, and web3 startups. We seek companies early, under $25M in valuation.",
        "Type": "VC",
        "Industry": "Sector Agnostic",
        "Cheque_range": "$200K - $1M",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Blockrocket",
        "Website": "https://www.blockrocket.io/",
        "Global_HQ": "Germany",
        "Countries": "Germany,Austria,Switzerland,France,Luxembourg,UK,USA,Canada,Spain,Portugal,Singapore,UAE,Denmark,Belgium,Netherlands",
        "Stage": "Pre-seed, Idea, Prototype/MVP, Seed",
        "Overview": "We invest in early-stage blockchain startups.",
        "Type": "VC",
        "Industry": "Sector Agnostic",
        "Cheque_range": "$100K - $500K",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Blue Collective",
        "Website": "https://bluecollective.com/",
        "Global_HQ": "United States",
        "Countries": "USA",
        "Stage": "Pre-seed, Idea, Prototype/MVP, Seed, Series+",
        "Overview": "We invest in pre-Series A companies (Pre-Seed and Seed). We are NOT thesis-driven in the way most VCs are.",
        "Type": "VC",
        "Industry": "Sector Agnostic",
        "Cheque_range": "$200K - $2M",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Blue Future Partners",
        "Website": "https://www.bfp.vc/",
        "Global_HQ": "Germany",
        "Countries": "USA,UK,Israel,Germany,Singapore,Austria",
        "Stage": "Pre-seed, Series+",
        "Overview": "We invest in enterprise SaaS and Industry 4.0",
        "Type": "VC",
        "Industry": "SaaS",
        "Cheque_range": "$2M - $5M",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    },
    {
        "Name": "Blue Heron Capital",
        "Website": "https://www.blueheroncap.com/",
        "Global_HQ": "United States",
        "Countries": "USA",
        "Stage": "Pre-seed, Seed, Series+",
        "Overview": "We invest in differentiated platforms in the healthcare and technology industries with $2M+ in ARR.",
        "Type": "VC",
        "Industry": "Information Technology & Services",
        "Cheque_range": "$2M - $8M",
        "Linkedin_Company": "",
        "Email": "",
        "Linkedin_Personal": "",
        "Twitter": ""
    }
]

print("Sample investor dataset loaded successfully!")  # Confirms successful load
print(f"Loaded {len(INVESTOR_DATA)} investors")  # Displays the number of investors

Sample investor dataset loaded successfully!
Loaded 8 investors


Define Founder Data (Manual Input)

In [36]:
# 🔹 Define Founder Data (Sample Selection)
# List of sample founder profiles
sample_founders = [
    {
        "name": "BlastApp",
        "industry": "Fintech",
        "stage": "Seed",
        "funding_required": 500000,  # $500K
        "traction": "20K users",
        "business_model": "SaaS"
    },
    {
        "name": "HealthTech Innovate",
        "industry": "Healthcare",
        "stage": "Series A",
        "funding_required": 2000000,  # $2M
        "traction": "$500K revenue",
        "business_model": "B2B"
    },
    {
        "name": "Web3 Pioneer",
        "industry": "Web3",
        "stage": "Pre-Seed",
        "funding_required": 12000,  # $12K
        "traction": "1K beta users",
        "business_model": "Subscription"
    },
    {
        "name": "EcoSolutions",
        "industry": "Energy",
        "stage": "Series+",
        "funding_required": 5000000,  # $5M
        "traction": "$3M revenue",
        "business_model": "B2C"
    },
    {
        "name": "IoT Connect",
        "industry": "IoT",
        "stage": "Seed",
        "funding_required": 100000,  # $100K
        "traction": "10K devices sold",
        "business_model": "Hardware"
    },
    {
        "name": "EduPlatform",
        "industry": "Edtech",
        "stage": "Pre-Seed",
        "funding_required": 200000,  # $200K
        "traction": "5K students",
        "business_model": "SaaS"
    },
    {
        "name": "MarketMate",
        "industry": "Marketplace",
        "stage": "Series A",
        "funding_required": 2000000,  # $2M
        "traction": "$1M GMV",
        "business_model": "B2C"
    },
    {
        "name": "MediTech Solutions",
        "industry": "Medtech",
        "stage": "Series+",
        "funding_required": 8000000,  # $8M
        "traction": "$4M revenue",
        "business_model": "B2B"
    }
]

print("Select a sample founder profile by number:")
for i, sample in enumerate(sample_founders):  # Loops through samples to display options
    print(f"{i}: {sample['name']} (Industry: {sample['industry']}, Stage: {sample['stage']}, Funding: ${sample['funding_required']})")

# Get user selection
selection = int(input("Enter the number of the sample profile (0-7): "))  # Prompts user to select a number (updated range)
if 0 <= selection < len(sample_founders):  # Validates the selection
    founder_data = sample_founders[selection]  # Assigns the selected sample to founder_data
else:  # Handles invalid selection
    print("Invalid selection. Using default sample (BlastApp).")
    founder_data = sample_founders[0]  # Defaults to the first sample if input is invalid

print("\nFounder Data Selected:", founder_data)  # Displays the selected founder data

Select a sample founder profile by number:
0: BlastApp (Industry: Fintech, Stage: Seed, Funding: $500000)
1: HealthTech Innovate (Industry: Healthcare, Stage: Series A, Funding: $2000000)
2: Web3 Pioneer (Industry: Web3, Stage: Pre-Seed, Funding: $12000)
3: EcoSolutions (Industry: Energy, Stage: Series+, Funding: $5000000)
4: IoT Connect (Industry: IoT, Stage: Seed, Funding: $100000)
5: EduPlatform (Industry: Edtech, Stage: Pre-Seed, Funding: $200000)
6: MarketMate (Industry: Marketplace, Stage: Series A, Funding: $2000000)
7: MediTech Solutions (Industry: Medtech, Stage: Series+, Funding: $8000000)

Founder Data Selected: {'name': 'EcoSolutions', 'industry': 'Energy', 'stage': 'Series+', 'funding_required': 5000000, 'traction': '$3M revenue', 'business_model': 'B2C'}


Calculate and Display Matches

In [None]:
# 🔹 Calculate Matches
min_score = 70  # Sets a default minimum score (can be modified manually)

# Ensure match_results is defined globally or locally
match_results = None  # Initializes match_results to avoid NameError if not set earlier

print("Analyzing investor matches...")  # Simulates the spinner message
matches = calculate_matches(founder_data, INVESTOR_DATA, min_score, MAX_RESULTS_DEFAULT)  # Calculates matches

if matches:  # Checks if any matches were found
    match_results = {  # Stores results in a dictionary
        "founder_name": founder_data.get("name", "Unknown Founder"),  # Safely gets founder name, updated default
        "matches": matches  # Stores the list of matches
    }
    print(f"🎉 Matches Found for {match_results['founder_name']}")  # Prints success message
    print("Investor Matches:")  # Header for matches
    for match in match_results['matches']:  # Loops through each match
        print(f"\n💼 {match['investor_name']} - Score: {match['match_score']}/100")  # Prints investor name and score
        print(f"Match Score: {match['match_score']}/100")  # Prints score
        print(f"Reasoning: {match['reasoning']}")  # Prints reasoning
else:  # If no matches found
    match_results = None  # Clears results
    if not INVESTOR_DATA:  # Checks if investor data is empty
        print("❌ Matching failed: No investor data loaded. Please load investor data first.")
    else:
        print("❌ Matching failed: No compatible investors found. Check your founder inputs or investor data.")

Analyzing investor matches...


Save Results to File

In [38]:
# 🔹 Save Results to File (if matches exist)
if match_results:  # Checks if match results exist
    download_content = f"Founder-Investor Matching Results for {match_results['founder_name']}\n\n"  # Prepares text content
    download_content += "Investor Matches:\n"  # Adds header for matches
    for match in match_results['matches']:  # Loops through matches to build content
        download_content += f"\n{match['investor_name']}:\n"  # Adds investor name
        download_content += f"Match Score: {match['match_score']}/100\n"  # Adds score
        download_content += f"Reasoning: {match['reasoning']}\n"  # Adds reasoning
        download_content += "-" * 50 + "\n"  # Adds a separator line
    
    download_file_name = f"{match_results['founder_name'].replace(' ', '_')}_investor_matches.txt"  # Generates a filename
    with open(download_file_name, 'w') as f:  # Opens a file in write mode
        f.write(download_content)  # Writes the content to the file
    print(f"Results saved to {download_file_name}")  # Confirms file save
else:
    print("No results to save.")  # Prints message if no results

Results saved to EcoSolutions_investor_matches.txt
