# ðŸ“ˆ Startup Data Visualization

Creating compelling visualizations from MongoDB aggregation results.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
import sys
sys.path.append('..')
import config
from mongodb_setup import MongoDBHandler

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# Connect to MongoDB
handler = MongoDBHandler()
handler.connect()
collection = handler.db[config.COLLECTION_NAME]

## 1. Top Industries by Funding

In [None]:
pipeline = [{"$unwind": "$industry"}, {"$group": {"_id": "$industry", "total_funding": {"$sum": "$total_funding_usd"}, "count": {"$sum": 1}}}, {"$sort": {"total_funding": -1}}, {"$limit": 10}]
data = list(collection.aggregate(pipeline))
df = pd.DataFrame(data)
df.columns = ['industry', 'total_funding', 'count']

fig = px.bar(df, x='industry', y='total_funding', title='Top 10 Industries by Total Funding', labels={'total_funding': 'Total Funding (USD)'})
fig.update_layout(xaxis_tickangle=-45)
fig.show()

## 2. Funding by Country

In [None]:
pipeline = [{"$group": {"_id": "$country", "total_funding": {"$sum": "$total_funding_usd"}, "startup_count": {"$sum": 1}}}, {"$sort": {"total_funding": -1}}]
data = list(collection.aggregate(pipeline))
df = pd.DataFrame(data)
df.columns = ['country', 'total_funding', 'startup_count']

fig = px.choropleth(df, locations='country', locationmode='country names', color='total_funding', hover_data=['startup_count'], title='Global Startup Funding Distribution')
fig.show()

## 3. Startup Formation Timeline

In [None]:
pipeline = [{"$group": {"_id": "$founded_year", "count": {"$sum": 1}, "total_funding": {"$sum": "$total_funding_usd"}}}, {"$sort": {"_id": 1}}]
data = list(collection.aggregate(pipeline))
df = pd.DataFrame(data)
df.columns = ['year', 'count', 'total_funding']

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(x=df['year'], y=df['count'], name='Startup Count'), secondary_y=False)
fig.add_trace(go.Scatter(x=df['year'], y=df['total_funding'], name='Total Funding', mode='lines+markers'), secondary_y=True)
fig.update_layout(title='Startup Formation and Funding Over Time')
fig.show()

## 4. Top Funded Startups

In [None]:
pipeline = [{"$sort": {"total_funding_usd": -1}}, {"$limit": 10}, {"$project": {"name": 1, "total_funding_usd": 1, "country": 1}}]
data = list(collection.aggregate(pipeline))
df = pd.DataFrame(data)

fig = px.bar(df, x='total_funding_usd', y='name', orientation='h', color='country', title='Top 10 Most Funded Startups')
fig.show()

In [None]:
handler.close()