In [None]:
import pandas as pd
import numpy as np
import warnings

import seaborn as sns
import matplotlib.pyplot as plt

#Plotly Libraris
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.colors import n_colors
from plotly.subplots import make_subplots
import plotly.io as pio



warnings.filterwarnings('ignore')

pio.templates.default = "ggplot2"

#dataframe display settings
pd.set_option('display.max_columns', 5000000)
pd.set_option('display.max_rows', 50000000)

In [None]:
# LOAD ALL THE DATA
## Data from dataset 1
acquisitions = pd.read_csv('data/acquisitions.csv')
funding_rounds = pd.read_csv('data/funding_rounds.csv')
funds = pd.read_csv('data/funds.csv')
investments = pd.read_csv('data/investments.csv')
ipos = pd.read_csv('data/ipos.csv')
milestones = pd.read_csv('data/milestones.csv')
objects = pd.read_csv('data/objects.csv')
offices = pd.read_csv('data/offices.csv')
people = pd.read_csv('data/people.csv')
relationships = pd.read_csv('data/relationships.csv')

## Data from dataset 2
founders = pd.read_csv('data/dataset2.csv')

## Data from dataset 3
industry_trends = pd.read_csv('data/dataset3.csv')
industry_trends.rename(columns={"Unnamed: 0": "StartupID", "Unnamed: 6": "City_ZIP"}, inplace=True)
## Data from dataset 4
startup_info = pd.read_csv('data/dataset4.csv')


In [None]:
# To begin with we will start using the dataset 4 since it is the most simplified one
startup_info.head(5)

In [None]:
startup_info.info()

In [None]:
startup_info['founded_year'] = pd.to_datetime(startup_info['founded_at'], errors = 'coerce').dt.year
startup_info['funding_total_usd'].replace('-', 0, inplace=True)
startup_info['funding_total_usd'] = startup_info['funding_total_usd'].astype(float)
startup_info.describe()

In [None]:
startup_info = startup_info.drop_duplicates()
startup_info.shape

In [None]:
startup_info.isna().sum()

In [None]:
# Cleanning dataset
clean_startup_info = startup_info.drop(['permalink', 'homepage_url'], axis=1)

# Remove the NaN name
clean_startup_info.dropna(how='any', subset=['name'], axis=0, inplace=True)

#Extracting year value from "first_funding_at" and changing to int
clean_startup_info['first_funding_at'] = clean_startup_info.first_funding_at.str.split("-").str[0]
clean_startup_info['first_funding_at'] = clean_startup_info['first_funding_at'].astype(float)
#Extracting year value from "last_funding_at" and changing to int
clean_startup_info['last_funding_at'] = clean_startup_info.last_funding_at.str.split("-").str[0]
clean_startup_info['last_funding_at'] = clean_startup_info['last_funding_at'].astype(float)

# Treat Nan category
clean_startup_info['category_list'] = clean_startup_info['category_list'].replace(np.nan, 'other')
clean_startup_info.head()

In [None]:
# Check false entries
filter_0 = len(clean_startup_info[clean_startup_info['first_funding_at']>clean_startup_info['last_funding_at']])
filter_1 = len(clean_startup_info[clean_startup_info['first_funding_at']<1749])
total = filter_0 + filter_1
print(f'There are {total} entries that are wrong by common sense')
print(clean_startup_info[clean_startup_info['first_funding_at']<1749][['name', 'founded_at', 'first_funding_at']])
clean_startup_info.drop(clean_startup_info[clean_startup_info['first_funding_at']<1749].index, inplace=True)



In [None]:
startups_history = clean_startup_info.groupby('founded_year').size().reset_index(name='#Startups')
funding_history = clean_startup_info.groupby('first_funding_at').size().reset_index(name='#Startups')

In [None]:
# Plot the distribution using Plotly Express with a line plot
fig = px.area(startups_history, x='founded_year', y='#Startups', 
              title='Distribution of Startups by Founded Year and First funding year')

fig.add_trace(go.Scatter(x=startups_history['founded_year'], 
                         y=startups_history['#Startups'], 
                         fill='tozeroy',  # Filling area below the line
                         mode='none', 
                         fillcolor='rgba(255,0,0,0.4)',  # Adjust color as needed
                         name='Founded year')) 

# Add an area chart for the second DataFrame
fig.add_trace(go.Scatter(x=funding_history['first_funding_at'], 
                         y=funding_history['#Startups'], 
                         fill='tozeroy',  # Filling area below the line
                         mode='none', 
                         fillcolor='rgba(0,176,246,0.3)',  # Adjust color as needed
                         name='First funding Year'))  # Legend label

# Add range slider
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=10,
                     label="10y",
                     step="year",
                     stepmode="todate"),
                dict(count=20,
                     label="20y",
                     step="year",
                     stepmode="todate"),
                dict(count=50,
                     label="50y",
                     step="year",
                     stepmode="todate"),
                dict(count=100,
                     label="100y",
                     step="year",
                     stepmode="todate"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [None]:
fig_country = go.Figure()


# Define a custom greens colorscale with shades of green
colorscale = [[i / 15, f'rgba(0, {int(176 - (i * (176 / 15)))}, 0, {0.5 + (i / 30)})'] for i in range(15)]

# Create the bar plot
fig_country = go.Figure(go.Bar(
    x=clean_startup_info['country_code'].value_counts().index[:15],
    y=clean_startup_info['country_code'].value_counts()[:15],
    hovertemplate='<br><b>Country</b>: %{x}'+'<br><i>Startup count</i>: %{y}',
    marker=dict(color=clean_startup_info['country_code'].value_counts()[:15], colorscale=colorscale)
))


fig_country.update_layout(title="Number of Startups in each Country",
                             xaxis_title="Country", yaxis_title="Startup Count",title_x=0.5, paper_bgcolor="silver",
                             title_font_size=20)
fig_country.show()


In [None]:
fig_country_map = go.Figure()
fig_country_map.add_trace(go.Choropleth(locations=clean_startup_info['country_code'].value_counts().index,
                                       z=clean_startup_info['country_code'].value_counts(),
                                       colorscale='greens',
                                       colorbar_title="Nos. of Startups founded",
                                       ))
fig_country_map.update_layout(title_text="Number of Startups Country wise.", title_x=0.5, title_font_size=20)
fig_country_map.show()

In [None]:
# The most radical approach is to consider a successful startup that one that is considered a Unicorn

startup_unicorns = ['Uber', 'Google', 'Alibaba', 'Apple',
                    'Amazon', 'Airbnb', 'Facebook', 'PayPal', 
                    'Xiaomi', 'Pinterest', 'Coursera', 'Stripe']

color = ['Black','Orange','Blue','Darkblue', 
         'lightblue', 'darkorange','teal', 'red',
         'lightgreen','orange','cyan','pink']

unicorn_startups = clean_startup_info.set_index('name', drop=True)

unicorn_founding_year = []
total_funding=[]
#Extracting its details from the dataset
for i in startup_unicorns:
    unicorn_founding_year.append(int(unicorn_startups.loc[i]['founded_year']))
    total_funding.append(int(unicorn_startups.loc[i]['funding_total_usd']))    
df_unicorns = pd.DataFrame(list(zip(startup_unicorns, unicorn_founding_year, total_funding, color)),
                           columns=['Unicorn name', 'Founding year','Total funding','Color']).sort_values(by='Total funding')
df_unicorns.head()


In [None]:
fig_unicorn_founded = go.Figure()
fig_unicorn_founded.add_trace(go.Histogram(x=clean_startup_info[(clean_startup_info['founded_year']>1970) & (clean_startup_info['founded_year']<2025)]['founded_year'], name="Founded year", marker=dict(opacity=0.9),
                                          hovertemplate = '<br><b>Year</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                                          ))

for i in df_unicorns['Unicorn name']:
    fig_unicorn_founded.add_shape(
            # Line Vertical
            dict(
                type="line",
                xref="x",
                yref="paper",
                x0=str(list(df_unicorns[df_unicorns['Unicorn name']==i]['Founding year'])[0]),
                y0=0,
                x1=str(list(df_unicorns[df_unicorns['Unicorn name']==i]['Founding year'])[0]),
                y1=1,
                line=dict(
                    color=str(list(df_unicorns[df_unicorns['Unicorn name']==i]['Color'])[0]),
                    width=1
                )
    ))
    
fig_unicorn_founded.add_trace(go.Scatter(x=df_unicorns['Founding year'],
                                        y=df_unicorns.index*700+300, mode="text", text=df_unicorns[['Unicorn name']], 
                                        textfont=dict(family="sans serif",size=15), showlegend=False,
                                        hovertemplate = '<br><b>Company</b>: %{text}'+'<br><i>Founding Year</i>: %{x}'
                                        ))

fig_unicorn_founded.update_layout(title="Years in which some Startup Unicorns were founded",
                             xaxis_title="Year", yaxis_title="Startup Count",title_x=0.5, paper_bgcolor="mintcream",
                             title_font_size=20,legend=dict(orientation='h',yanchor='top',y=1.1,xanchor='right',x=1))
fig_unicorn_founded.show()

In [None]:
categories_list = clean_startup_info['category_list'].str.split('|', expand=True).stack()

fig_market = go.Figure()
fig_market.add_trace(go.Bar(x=categories_list.value_counts().index[:30], y=categories_list.value_counts()[:30],
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}',
                           marker=dict(color=list(range(30)), colorscale="Sunsetdark")))

fig_market.update_layout(title="Number of Startups in each Market",
                             xaxis_title="Market", yaxis_title="Startup Count",title_x=0.5, paper_bgcolor="mintcream",
                             title_font_size=20)
fig_market.show()

In [None]:
closed_categories = clean_startup_info[clean_startup_info['status']=='closed']['category_list'].str.split('|', expand=True).stack()
fig_closed = make_subplots(rows=1, cols=2, shared_yaxes=True,
                           subplot_titles=("Markets with Most closed Startups", "Markets with Least closed Startups"))
fig_closed.add_trace(go.Bar(x=closed_categories.value_counts()[:10].index,
                            y=closed_categories.value_counts()[:10], name="Market with most closed Startups",
                            marker=dict(color=list(range(20)), colorscale="reds_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=1)
fig_closed.add_trace(go.Bar(x=closed_categories.value_counts()[-10:].index,
                            y=closed_categories.value_counts()[-10:], name="Market with least closed Startups",
                            marker=dict(color=list(range(20)), colorscale="greens_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=2)

fig_closed.update_layout(showlegend=False, paper_bgcolor="mintcream")
fig_closed.show()

In [None]:
acquired_categories = clean_startup_info[clean_startup_info['status']=='acquired']['category_list'].str.split('|', expand=True).stack()
fig_closed = make_subplots(rows=1, cols=2, shared_yaxes=True,
                           subplot_titles=("Markets with Most Acquired Startups", "Markets with Least Acquired Startups"))
fig_closed.add_trace(go.Bar(x=acquired_categories.value_counts()[:10].index,
                            y=acquired_categories.value_counts()[:10], name="Market with most Acquired Startups",
                            marker=dict(color=list(range(20)), colorscale="greens_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=1)
fig_closed.add_trace(go.Bar(x=acquired_categories.value_counts()[-10:].index,
                            y=acquired_categories.value_counts()[-10:], name="Market with least Acquired Startups",
                            marker=dict(color=list(range(20)), colorscale="reds_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=2)

fig_closed.update_layout(showlegend=False, paper_bgcolor="mintcream")
fig_closed.show()

In [None]:
operating_categories = clean_startup_info[clean_startup_info['status']=='operating']['category_list'].str.split('|', expand=True).stack()
fig_closed = make_subplots(rows=1, cols=2, shared_yaxes=True,
                           subplot_titles=("Markets with Most Operating Startups", "Markets with Least Operating Startups"))
fig_closed.add_trace(go.Bar(x=operating_categories.value_counts()[:10].index,
                            y=operating_categories.value_counts()[:10], name="Market with most Operating Startups",
                            marker=dict(color=list(range(20)), colorscale="greens_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=1)
fig_closed.add_trace(go.Bar(x=operating_categories.value_counts()[-10:].index,
                            y=operating_categories.value_counts()[-10:], name="Market with least Operating Startups",
                            marker=dict(color=list(range(20)), colorscale="reds_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=2)

fig_closed.update_layout(showlegend=False, paper_bgcolor="mintcream")
fig_closed.show()

In [None]:
operating_startups = clean_startup_info[clean_startup_info['status']=='operating']
fig_country_map = go.Figure()
fig_country_map.add_trace(go.Choropleth(locations=operating_startups['country_code'].value_counts().index,
                                       z=operating_startups['country_code'].value_counts(),
                                       colorscale='greens',
                                       colorbar_title="Nos. of Startups founded",
                                       ))
fig_country_map.update_layout(title_text="Number of Startups Country wise.", title_x=0.5, title_font_size=20)
fig_country_map.show()

In [None]:
acquired_startups = clean_startup_info[clean_startup_info['status']=='acquired']
fig_country_map = go.Figure()
fig_country_map.add_trace(go.Choropleth(locations=acquired_startups['country_code'].value_counts().index,
                                       z=acquired_startups['country_code'].value_counts(),
                                       colorscale='blues',
                                       colorbar_title="Nos. of Startups founded",
                                       ))
fig_country_map.update_layout(title_text="Number of Startups Country wise.", title_x=0.5, title_font_size=20)
fig_country_map.show()

In [None]:
closed_startups = clean_startup_info[clean_startup_info['status']=='closed']
fig_country_map = go.Figure()
fig_country_map.add_trace(go.Choropleth(locations=closed_startups['country_code'].value_counts().index,
                                       z=closed_startups['country_code'].value_counts(),
                                       colorscale='reds',
                                       colorbar_title="Nos. of Startups founded",
                                       ))
fig_country_map.update_layout(title_text="Number of Startups Country wise.", title_x=0.5, title_font_size=20)
fig_country_map.show()

In [None]:
fig_funding_amt = px.scatter(clean_startup_info[:1000], x="name", y="funding_rounds", size='funding_total_usd', color='status')

fig_funding_amt.update_layout(
    title='Plot Showing the Funding and Total funding acquired by Startups',
    xaxis_title="Startups",
    yaxis_title="Funding Rounds",
    xaxis_showticklabels=False,
    paper_bgcolor="mintcream",
    title_font_size=20,
    title_x=0.5,
    legend=dict(orientation='h', yanchor='top', y=1.08, xanchor='right', x=1),
    margin=dict(b=100),
    xaxis=dict(title_standoff=45),  # Adjust the distance of x-axis title from the x-axis
    yaxis=dict(title_standoff=20)   # Adjust the distance of y-axis title from the y-axis
)

fig_funding_amt.update_traces(hovertemplate='<br><b>Company</b>: %{x}' + '<br><i>Funding Rounds</i>: %{y}' + '<br><i>Funding(in USD)</i>: %{marker.size}')
fig_funding_amt.show()


In [None]:
fig_status = make_subplots(rows=2, cols=2, specs=[[{"type": "domain", "colspan": 2}, None],[{"type": "domain"}, {"type": "domain"}]],
                          subplot_titles = ("Current status of all Startups", "Status of Startups founded before 2000", 
                                            "Status of Startups founded after 2000"))

fig_status.add_trace(go.Pie(labels=clean_startup_info['status'].value_counts().index,
                            values=clean_startup_info['status'].value_counts()), row=1, col=1)

fig_status.add_trace(go.Pie(labels=clean_startup_info[clean_startup_info['founded_year']<2000]['status'].value_counts().index,
                            values=clean_startup_info[clean_startup_info['founded_year']<2000]['status'].value_counts()), row=2, col=1)

fig_status.add_trace(go.Pie(labels=clean_startup_info[clean_startup_info['founded_year']>=2000]['status'].value_counts().index,
                            values=clean_startup_info[clean_startup_info['founded_year']>=2000]['status'].value_counts()), row=2, col=2)

fig_status.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=11,
                         insidetextorientation='horizontal', rotation=-45)
fig_status.update_layout(height=800, paper_bgcolor="mintcream")
fig_status.show()

In [None]:
successful_startups = corr_startups[corr_startups['is_successful']==1]
successful_startups.apply(lambda x: pd.factorize(x)[0]).corrwith(successful_startups['is_successful'])

In [None]:
successful_startups

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

corr_startups = clean_startup_info[clean_startup_info['status'] != 'ipo']
corr_startups.reset_index(drop=True, inplace=True)
corr_startups['is_successful'] = corr_startups['status'].apply(lambda x: 1 if x in ['operating', 'acquired'] else 0)

successful_startups = corr_startups[corr_startups['is_successful']==1]
fail_startups = corr_startups[corr_startups['is_successful']==0]

# Calculate the point-biserial correlation coefficient between each categorical variable and 'is_successful' for both cases
successful_startups = successful_startups.apply(lambda x: pd.factorize(x)[0]).corrwith(successful_startups['is_successful'])
fail_startups = fail_startups.apply(lambda x: pd.factorize(x)[0]).corrwith(fail_startups['is_successful'])


# Plot the correlation matrices
fig, ax = plt.subplots(1, 2, figsize=(20, 8))
sns.heatmap(successful_startups.to_frame(), annot=True, cmap="coolwarm", ax=ax[0], cbar=False, square=True)
ax[0].set_title("Correlation Matrix (is_successful = 1)")
sns.heatmap(fail_startups.to_frame(), annot=True, cmap="coolwarm", ax=ax[1], cbar=False, square=True)
ax[1].set_title("Correlation Matrix (is_successful = 0)")
plt.tight_layout()
plt.show()



In [None]:
# Let's get deeper analyzing the Dataset 3

industry_trends.info()

In [None]:
industry_trends['founded_year'] = pd.to_datetime(industry_trends['founded_at'], errors = 'coerce').dt.year


In [None]:
industry_trends.describe()
# Oldest startup => 1984

In [None]:
industry_trends.isna().sum()

In [None]:
# Cleanning dataset
clean_industry_trends = industry_trends.drop(['StartupID', 'City_ZIP'], axis=1)

# Remove the NaN name
clean_industry_trends.dropna(how='any', subset=['name'], axis=0, inplace=True)

#Extracting year value from "first_funding_at" and changing to int
clean_industry_trends['first_funding_at'] = clean_industry_trends.first_funding_at.str.split("/").str[2]
clean_industry_trends['first_funding_at'] = clean_industry_trends['first_funding_at'].astype(float)
#Extracting year value from "last_funding_at" and changing to int
clean_industry_trends['last_funding_at'] = clean_industry_trends.last_funding_at.str.split("/").str[2]
clean_industry_trends['last_funding_at'] = clean_industry_trends['last_funding_at'].astype(float)

In [None]:
# Check false entries
filter_0 = len(clean_industry_trends[clean_industry_trends['first_funding_at']>clean_industry_trends['last_funding_at']])
filter_1 = len(clean_industry_trends[clean_industry_trends['first_funding_at']<1984])
total = filter_0 + filter_1
print(f'There are {total} entries that are wrong by common sense')
print(clean_industry_trends[clean_industry_trends['first_funding_at']<1984][['name', 'founded_at', 'first_funding_at']])
clean_industry_trends.drop(clean_industry_trends[clean_industry_trends['first_funding_at']<1984].index, inplace=True)


In [None]:
startups_history = clean_industry_trends.groupby('founded_year').size().reset_index(name='#Startups')
funding_history = clean_industry_trends.groupby('first_funding_at').size().reset_index(name='#Startups')
# Plot the distribution using Plotly Express with a line plot
fig = px.area(startups_history, x='founded_year', y='#Startups', 
              title='Distribution of Startups by Founded Year and First funding year')

fig.add_trace(go.Scatter(x=startups_history['founded_year'], 
                         y=startups_history['#Startups'], 
                         fill='tozeroy',  # Filling area below the line
                         mode='none', 
                         fillcolor='rgba(255,0,0,0.4)',  # Adjust color as needed
                         name='Founded year')) 

# Add an area chart for the second DataFrame
fig.add_trace(go.Scatter(x=funding_history['first_funding_at'], 
                         y=funding_history['#Startups'], 
                         fill='tozeroy',  # Filling area below the line
                         mode='none', 
                         fillcolor='rgba(0,176,246,0.3)',  # Adjust color as needed
                         name='First funding Year'))  # Legend label

# Add range slider
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=10,
                     label="10y",
                     step="year",
                     stepmode="todate"),
                dict(count=20,
                     label="20y",
                     step="year",
                     stepmode="todate"),
                dict(count=50,
                     label="50y",
                     step="year",
                     stepmode="todate"),
                dict(count=100,
                     label="100y",
                     step="year",
                     stepmode="todate"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig.show()

In [None]:
fig_country = go.Figure()


# Define a custom greens colorscale with shades of green
colorscale = [[i / 15, f'rgba(0, {int(176 - (i * (176 / 15)))}, 0, {0.5 + (i / 30)})'] for i in range(15)]

# Create the bar plot
fig_country = go.Figure(go.Bar(
    x=clean_industry_trends['state_code'].value_counts().index[:15],
    y=clean_industry_trends['state_code'].value_counts()[:15],
    hovertemplate='<br><b>Country</b>: %{x}'+'<br><i>Startup count</i>: %{y}',
    marker=dict(color=clean_industry_trends['state_code'].value_counts()[:15], colorscale=colorscale)
))


fig_country.update_layout(title="Number of Startups in each State",
                             xaxis_title="Country", yaxis_title="Startup Count",title_x=0.5, paper_bgcolor="silver",
                             title_font_size=20)
fig_country.show()

In [None]:
fig_country_map = go.Figure()
fig_country_map.add_trace(go.Choropleth(locations=clean_industry_trends['state_code'].value_counts().index,
                                       z=clean_industry_trends['state_code'].value_counts(),
                                       locationmode="USA-states",
                                       colorscale='greens',
                                       colorbar_title="Nos. of Startups founded",
                                       ))
fig_country_map.update_layout(title_text="Number of Startups by State in USA", title_x=0.5, title_font_size=20,
                              geo = dict(
                                        scope='usa',
                                        projection=go.layout.geo.Projection(type = 'albers usa'),
                                        showlakes=True, # lakes
                                        lakecolor='rgb(255, 255, 255)'),
                                        paper_bgcolor="mintcream")
fig_country_map.show()

In [None]:
categories_list = clean_industry_trends['category_code']
fig_market = go.Figure()
fig_market.add_trace(go.Bar(x=categories_list.value_counts().index[:30], y=categories_list.value_counts()[:30],
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}',
                           marker=dict(color=list(range(30)), colorscale="Sunsetdark")))

fig_market.update_layout(title="Number of Startups in each Market",
                             xaxis_title="Market", yaxis_title="Startup Count",title_x=0.5, paper_bgcolor="mintcream",
                             title_font_size=20)
fig_market.show()

In [None]:
closed_categories = clean_industry_trends[clean_industry_trends['status']=='closed']['category_code']
fig_closed = make_subplots(rows=1, cols=2, shared_yaxes=True,
                           subplot_titles=("Markets with Most closed Startups", "Markets with Least closed Startups"))
fig_closed.add_trace(go.Bar(x=closed_categories.value_counts()[:10].index,
                            y=closed_categories.value_counts()[:10], name="Market with most closed Startups",
                            marker=dict(color=list(range(20)), colorscale="reds_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=1)
fig_closed.add_trace(go.Bar(x=closed_categories.value_counts()[-10:].index,
                            y=closed_categories.value_counts()[-10:], name="Market with least closed Startups",
                            marker=dict(color=list(range(20)), colorscale="greens_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=2)

fig_closed.update_layout(showlegend=False, paper_bgcolor="mintcream")
fig_closed.show()

In [None]:
closed_categories = clean_industry_trends[clean_industry_trends['status']=='acquired']['category_code']
fig_closed = make_subplots(rows=1, cols=2, shared_yaxes=True,
                           subplot_titles=("Markets with Most Acquired Startups", "Markets with Least Acquired Startups"))
fig_closed.add_trace(go.Bar(x=closed_categories.value_counts()[:10].index,
                            y=closed_categories.value_counts()[:10], name="Market with most Acquired Startups",
                            marker=dict(color=list(range(20)), colorscale="greens_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=1)
fig_closed.add_trace(go.Bar(x=closed_categories.value_counts()[-10:].index,
                            y=closed_categories.value_counts()[-10:], name="Market with least Acquired Startups",
                            marker=dict(color=list(range(20)), colorscale="reds_r"),
                           hovertemplate = '<br><b>Market</b>: %{x}'+'<br><i>Startup count</i>: %{y}'
                           ),
                            row=1, col=2)

fig_closed.update_layout(showlegend=False, paper_bgcolor="mintcream")
fig_closed.show()

In [None]:
fig_funding_amt = px.scatter(clean_industry_trends[:100], x="name", y="funding_rounds", size='funding_total_usd', color='status')

fig_funding_amt.update_layout(
    title='Plot Showing the Funding and Total funding acquired by Startups',
    xaxis_title="Startups",
    yaxis_title="Funding Rounds",
    xaxis_showticklabels=False,
    paper_bgcolor="mintcream",
    title_font_size=20,
    title_x=0.5,
    legend=dict(orientation='h', yanchor='top', y=1.08, xanchor='right', x=1),
    margin=dict(b=100),
    xaxis=dict(title_standoff=45),  # Adjust the distance of x-axis title from the x-axis
    yaxis=dict(title_standoff=20)   # Adjust the distance of y-axis title from the y-axis
)

fig_funding_amt.update_traces(hovertemplate='<br><b>Company</b>: %{x}' + '<br><i>Funding Rounds</i>: %{y}' + '<br><i>Funding(in USD)</i>: %{marker.size}')
fig_funding_amt.show()

In [None]:
fig_status = make_subplots(rows=2, cols=2, specs=[[{"type": "domain", "colspan": 2}, None],[{"type": "domain"}, {"type": "domain"}]],
                          subplot_titles = ("Current status of all Startups", "Status of Startups founded before 2000", 
                                            "Status of Startups founded after 2000"))

fig_status.add_trace(go.Pie(labels=clean_industry_trends['status'].value_counts().index,
                            values=clean_industry_trends['status'].value_counts()), row=1, col=1)

fig_status.add_trace(go.Pie(labels=clean_industry_trends[clean_industry_trends['founded_year']<2000]['status'].value_counts().index,
                            values=clean_industry_trends[clean_industry_trends['founded_year']<2000]['status'].value_counts()), row=2, col=1)

fig_status.add_trace(go.Pie(labels=clean_industry_trends[clean_industry_trends['founded_year']>=2000]['status'].value_counts().index,
                            values=clean_industry_trends[clean_industry_trends['founded_year']>=2000]['status'].value_counts()), row=2, col=2)

fig_status.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=11,
                         insidetextorientation='horizontal', rotation=-45)
fig_status.update_layout(height=800, paper_bgcolor="mintcream")
fig_status.show()