In [3]:
import csv
from datetime import datetime, timedelta
import random

# Function to generate random dates within the last 10 years
def random_date():
    end = datetime.now()
    start = end - timedelta(days=3650)  # 10 years
    return start + timedelta(days=random.randint(0, 3650))

# Lists for generating random but realistic data
company_names_prefix = ["Tech", "AI", "Data", "Cloud", "Cyber", "Bio", "Fin", "Health", "Smart", "Green"]
company_names_suffix = ["Systems", "Solutions", "Analytics", "Technologies", "Platform", "Networks", "Labs", "Connect", "Logic", "Scale"]
industries = ["SaaS", "FinTech", "HealthTech", "AI/ML", "Cybersecurity", "BioTech", "CleanTech", "E-commerce", "EdTech", "IoT"]
technologies = ["Python", "Java", "Cloud Native", "Blockchain", "AI/ML", "React", "Mobile", "Big Data", "DevOps", "Kubernetes"]
funding_rounds = ["Seed", "Series A", "Series B", "Series C", "Series D"]
exit_statuses = ["Active", "IPO", "Acquired", "Merged", "Active"]

# List of analysts (some will be repeated more frequently)
analysts = [
    "Sarah Johnson",
    "Michael Chen",
    "Emma Thompson",
    "David Kim",
    "Rachel Martinez",
    "Sarah Johnson",  # Repeated
    "Michael Chen",   # Repeated
    "Emma Thompson",  # Repeated
    "David Kim",      # Repeated
    "Sarah Johnson",  # Repeated again
    "Michael Chen"    # Repeated again
]

# Function to create sample VC deal data
def generate_vc_deals(num_deals=50):
    deals = []
    
    for _ in range(num_deals):
        # Generate company name
        company_name = f"{random.choice(company_names_prefix)}{random.choice(company_names_suffix)}"
        
        # Generate financial data
        revenue = round(random.uniform(1, 100), 2)  # Revenue in millions
        valuation = revenue * random.uniform(5, 20)  # Valuation in millions
        revenue_multiplier = round(valuation / revenue, 2)
        
        # Generate other deal attributes
        deal = {
            'company_name': company_name,
            'investment_date': random_date().strftime('%Y-%m-%d'),
            'industry': random.choice(industries),
            'technology_stack': random.choice(technologies),
            'funding_round': random.choice(funding_rounds),
            'investment_amount_m': round(random.uniform(1, 50), 2),
            'revenue_m': revenue,
            'valuation_m': round(valuation, 2),
            'revenue_multiplier': revenue_multiplier,
            'equity_stake': round(random.uniform(5, 30), 2),
            'exit_status': random.choice(exit_statuses),
            'irr_percentage': round(random.uniform(15, 100), 2) if random.random() > 0.3 else None,
            'analyst': random.choice(analysts)  # Add analyst to each deal
        }
        deals.append(deal)
    
    return deals

# Generate the deals
vc_deals = generate_vc_deals()

# Write to CSV file
filename = 'vc_deals_database.csv'
fields = ['company_name', 'investment_date', 'industry', 'technology_stack', 
          'funding_round', 'investment_amount_m', 'revenue_m', 'valuation_m',
          'revenue_multiplier', 'equity_stake', 'exit_status', 'irr_percentage', 'analyst']

with open(filename, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fields)
    writer.writeheader()
    writer.writerows(vc_deals)

print(f"CSV file '{filename}' has been created successfully with {len(vc_deals)} deals.")

CSV file 'vc_deals_database.csv' has been created successfully with 50 deals.


In [None]:
df