Missing data for the following VC fims:
- onmes capital
- cathay capital
- elaia
- headline
- mmc

In [1]:
import pandas as pd
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px

In [2]:
# Function to filter investors
def investors_per_fund(x):
    if isinstance(x, str):
        return [investor for investor in map(str.strip, x.lower().split(';')) if investor in vc_firms]
    return []

def is_datch(x):
    return 'DATCH' if x in {'Germany', 'Austria', 'Switzerland'} else x

def is_nordic(x):
    return 'Nordic' if x in {'Denmark', 'Sweden', 'Finland', 'Iceland', 'Norway'} else x

def is_other(x):
    return 'Others' if x not in {'DATCH', 'Nordic', 'United Kingdom', 'France', 'Spain'} else x

def in_industry(x):
    return 'Others' if x not in {'education', 'energy', 'enterprise sotfware', 'fintech',
                                 'food', 'gaming', 'health', 'jobs recruiment', 'marketing',
                                 'real estate', 'robotics', 'security', 'transportation',
                                 'travel'} else x

In [3]:
src = Path(r"C:\Users\juann\Notebook")
file = src / 'dealroom_batch.csv'
df = pd.read_csv(file)

In [4]:
# List of VC firms to keep
vc_firms = [
    "Eurazeo", "Partech", "Iris", "Omnes Capital", "Cathay Innovation", "Elaia Partners", "Ventech", "Karista",
    "Headline", "Northzone", "MMC Ventures", "83North", "Octopus Ventures", "Felix Capital", "Notion Capital",
    "DN Capital", "Dawn Capital", "Atlantic Bridge", "Creandum", "EQT Ventures", "Axa Venture Partners"
]
# vc_firms = [
#     "HV Capital", "Earlybird Venture Capital", "Lakestar", "Target Global", "Speedinvest", "Cherry Ventures", "Picus Capital"
# ]
vc_firms = [comp.lower() for comp in vc_firms]

In [5]:
# Remove duplicates efficiently
df = df.drop_duplicates(subset='Name').reset_index(drop=True)

# Filter investors and explode in one step
df['Filtered Investors'] = df['Investors'].apply(investors_per_fund)
df_exploded = (
    df.explode('Filtered Investors')
    .drop(columns=['Investors'])
    .rename(columns={'Filtered Investors': 'Investor'})
)

# Efficiently update HQ country data
df_exploded['HQ country'] = df_exploded['HQ country'].map(is_datch)
df_exploded['HQ country'] = df_exploded['HQ country'].map(is_nordic)
df_exploded['HQ country'] = df_exploded['HQ country'].map(is_other)

# Improved splitting logic using `.str.split()` for faster vectorized operations
df_exploded[['Client focus', 'Industries']] = df_exploded[['Client focus', 'Industries']].fillna('').applymap(lambda x: x.split(';'))

# Explode multiple columns efficiently using `.pipe()`
df_exploded = df_exploded.pipe(lambda d: d.explode('Client focus').explode('Industries'))

# df_exploded['Industries'] = df_exploded['Industries'].map(in_industry)

  df_exploded[['Client focus', 'Industries']] = df_exploded[['Client focus', 'Industries']].fillna('').applymap(lambda x: x.split(';'))


In [6]:
for values in vc_firms:
    if values not in df_exploded['Investor'].unique():
        print(values)

In [7]:
# Geographical Scope
grouped_df = df_exploded.groupby(['Investor', 'HQ country'], as_index=False).size()
grouped_df.rename(columns={'size': 'Count'}, inplace=True)

# Calculate the percentage relative to the total for each investor
grouped_df['Percentage'] = grouped_df['Count'] / grouped_df.groupby('Investor')['Count'].transform('sum') * 100

geo_graphic = px.bar(grouped_df, x="Investor", y="Percentage", color="HQ country", title="Long-Form Input").to_html(full_html=False, include_plotlyjs="cdn")

  sf: grouped.get_group(s if len(s) > 1 else s[0])


In [8]:
# Client focus Scope
grouped_df = df_exploded.groupby(['Investor', 'Client focus'], as_index=False).size()
grouped_df.rename(columns={'size': 'Count'}, inplace=True)

# Calculate the percentage relative to the total for each investor
grouped_df['Percentage'] = grouped_df['Count'] / grouped_df.groupby('Investor')['Count'].transform('sum') * 100

client_foucs = px.bar(grouped_df, x="Investor", y="Percentage", color="Client focus", title="Long-Form Input").to_html(full_html=False, include_plotlyjs="cdn")





In [9]:
# Group by Investor and Industries
grouped_df = df_exploded.groupby(['Investor', 'Industries'], as_index=False).size()
grouped_df.rename(columns={'size': 'Count'}, inplace=True)

# Calculate the percentage relative to the total for each investor
grouped_df['Percentage'] = grouped_df['Count'] / grouped_df.groupby('Investor')['Count'].transform('sum') * 100

industry = px.bar(grouped_df, x="Investor", y="Percentage", color="Industries", title="Long-Form Input").to_html(full_html=False, include_plotlyjs="cdn")





In [10]:
# Group by Investor and Round
grouped_df = df_exploded.groupby(['Investor', 'Round'], as_index=False).size()
grouped_df.rename(columns={'size': 'Count'}, inplace=True)

# Calculate the percentage relative to the total for each investor
grouped_df['Percentage'] = grouped_df['Count'] / grouped_df.groupby('Investor')['Count'].transform('sum') * 100

rounds = px.bar(grouped_df, x="Investor", y="Percentage", color="Round", title="Long-Form Input").to_html(full_html=False, include_plotlyjs="cdn")





In [13]:
html_content = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>BBVA Navbar and Case Study</title>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <style>
        body {{
            margin: 0;
            font-family: Arial, sans-serif;
        }}
        .navbar {{
            background-color: #001F54;
            color: white;
            padding: 10px 20px;
            height: 60px;
            width: 100%;
            box-sizing: border-box;
        }}
        .navbar .content {{
            display: flex;
            justify-content: space-between;
            align-items: center;
        }}
        .navbar .content .logo {{
            font-size: 24px;
            font-weight: bold;
        }}
        .navbar .content .menu {{
            display: flex;
            gap: 20px;
        }}
        .navbar .content .menu a {{
            color: white;
            text-decoration: none;
            font-size: 16px;
            font-weight: 600;
        }}
        .navbar .content .menu a:hover {{
            text-decoration: underline;
        }}
        .main-content {{
            max-width: 1200px;
            margin: 20px auto;
            padding: 20px;
            font-size: 16px;
            line-height: 1.6;
        }}
        .main-content h1 {{
            font-size: 24px;
            margin-bottom: 20px;
            color: #0033A0;
        }}
        .results-container {{
            display: flex;
            flex-wrap: nowrap; /* Prevent wrapping */
            gap: 10px; /* Reduced gap between chart and text */
            align-items: flex-start;
            margin-bottom: 80px;
        }}
        .chart-container {{
            flex: 1 1 auto;
            display: flex;
            justify-content: center;
            align-items: center;
        }}
        .text-container {{
            flex: 0 1 auto; /* Allow text to shrink if needed */
            max-width: 40%; /* Keep the text from being too wide */
            margin-right: 10px; /* Consistent margin on the right */
            font-size: 16px;
            line-height: 1.6;
        }}
    </style>
</head>
<body>
    <div class="navbar">
        <div class="content">
            <div class="logo">Bilbao Vizcaya Investments (a BBVA Group company)</div>
        </div>
    </div>
    <div class="results-container">
        <div class="chart-container">
            {geo_graphic}
        </div>
    </div>
    <div class="results-container">
        <div class="chart-container">
            {client_foucs}
        </div>
    </div>
    <div class="results-container">
        <div class="chart-container">
            {industry}
        </div>
    </div>
    <div class="results-container">
        <div class="chart-container">
            {rounds}
        </div>
    </div>
</body>
</html>
"""
import pathlib

# Save the HTML content to a file
html_file = pathlib.Path(r"C:\Users\juann\OneDrive\Documentos\GitHub\Data-analysis\analysis.html")
html_file.write_text(html_content, encoding="utf-8")


65963