In [584]:
import pandas as pd
import numpy as np
import plotly.express as px

In [585]:
startup = pd.read_csv('startup_cleaned.csv')

In [586]:
startup.head(2)

Unnamed: 0,date,name,vertical,subvertical,city,investors,type,amount
0,2020-09-01,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,1656.8
1,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,66.672896


In [587]:
# Highest investment made by an Investor in a city
startup[startup['investors'].str.contains(' IDG Ventures')].groupby('city')['amount'].sum()

city
Bangalore    453.963200
Chennai       57.988000
Delhi         45.562000
Mumbai       246.034800
New Delhi    560.755351
Pune         186.390000
Singapore     24.852000
Name: amount, dtype: float64

In [588]:
# Type of investment
df = startup[startup['investors'].str.contains(' IDG Ventures')].groupby('type')['amount'].sum().reset_index()
df

Unnamed: 0,type,amount
0,Private Equity,1537.5104
1,Seed Funding,8.284
2,Series B,29.75095


In [589]:
px.pie(df, values='amount', names='type').show()

In [590]:
fig = px.bar(df, x='type', y='amount')
fig.show()

In [591]:
startup[startup['investors'].str.contains(' IDG Ventures')].groupby('name')['amount'].sum().sort_values(ascending=False).head().reset_index()

Unnamed: 0,name,amount
0,Lenskart,497.04
1,CureFit,207.1
2,Bizongo,182.248
3,Curefit,124.26
4,Xpressbees,103.55


In [592]:
df = startup[startup['investors'].str.contains(' IDG Ventures')].groupby('vertical')['amount'].sum().reset_index()

In [593]:
fig = px.pie(df, values='amount', names='vertical')
fig.show()

In [594]:
startup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2821 entries, 0 to 2820
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         2821 non-null   object 
 1   name         2821 non-null   object 
 2   vertical     2821 non-null   object 
 3   subvertical  2077 non-null   object 
 4   city         2821 non-null   object 
 5   investors    2821 non-null   object 
 6   type         2821 non-null   object 
 7   amount       2821 non-null   float64
dtypes: float64(1), object(7)
memory usage: 176.4+ KB


In [595]:
startup['date'] = pd.to_datetime(startup['date'])

In [596]:
startup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2821 entries, 0 to 2820
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         2821 non-null   datetime64[ns]
 1   name         2821 non-null   object        
 2   vertical     2821 non-null   object        
 3   subvertical  2077 non-null   object        
 4   city         2821 non-null   object        
 5   investors    2821 non-null   object        
 6   type         2821 non-null   object        
 7   amount       2821 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(6)
memory usage: 176.4+ KB


In [597]:
startup['year'] = startup['date'].dt.year

In [598]:
startup[startup['investors'].str.contains(' IDG Ventures')].groupby('year')['amount'].sum().reset_index()

Unnamed: 0,year,amount
0,2015,91.124
1,2016,915.382
2,2017,311.4784
3,2018,227.81
4,2019,29.75095


In [599]:
# Sub sector investment made by an Investor
startup[startup['investors'].str.contains(' IDG Ventures')].groupby('subvertical')['amount'].sum().reset_index()

Unnamed: 0,subvertical,amount
0,Aggregator For Packaging Material,182.248
1,Banking & Fintech Platform,24.852
2,Big Data Analytics Platform,22.3668
3,CRM SAAS Platform,49.704
4,Digital Media publication for women,25.6804
5,ECommerce Logistics solutions,103.55
6,End to End health and wellness platform.,207.1
7,Eyewear & Accessories etailer,497.04
8,Local Recommendations and Discoveries Platform,8.284
9,Medical Automation platform,48.0472


In [600]:
# Find similar investors based on sector invest in or invest together
startup

Unnamed: 0,date,name,vertical,subvertical,city,investors,type,amount,year
0,2020-09-01,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,1656.800000,2020
1,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,66.672896,2020
2,2020-09-01,Mamaearth,E-commerce,Retailer of baby and toddler products,Bengaluru,Sequoia Capital India,Series B,152.084796,2020
3,2020-02-01,https://www.wealthbucket.in/,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,24.852000,2020
4,2020-02-01,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,14.911200,2020
...,...,...,...,...,...,...,...,...,...
2816,2015-04-29,Tracxn,Startup Analytics platform,,Bangalore,SAIF Partners,Private Equity,28.994000,2015
2817,2015-04-29,Dazo,Mobile Food Ordering app,,Bangalore,"Sumit Jain, Aprameya Radhakrishna, Alok Goel, ...",Seed Funding,0.000000,2015
2818,2015-04-29,Tradelab,Financial Markets Software,,Bangalore,Rainmatter,Seed Funding,3.313600,2015
2819,2015-04-29,PiQube,Hiring Analytics platform,,Chennai,The HR Fund,Seed Funding,4.142000,2015


In [601]:
startup[startup['vertical'] == 'E-commerce']

Unnamed: 0,date,name,vertical,subvertical,city,investors,type,amount,year
2,2020-09-01,Mamaearth,E-commerce,Retailer of baby and toddler products,Bengaluru,Sequoia Capital India,Series B,152.084796,2020
85,2019-05-05,StyleDotMe,E-commerce,Fashion and Shopping,Delhi,Indian Angel Network and other angel investors...,Bridge Round,0.0,2019
90,2019-05-30,GlowRoad,E-commerce,Retail,Bengaluru,"Korea Investment Partners, Vertex Ventures",Series B,95.266,2019
93,2019-12-04,Medlife,E-commerce,Online Medicine,Bengaluru,Prasid Uno Family Trust,Private Equity,140.828,2019
94,2019-10-04,Kabadiwala,E-commerce,Waste Management,Bhopal,"Unilever, Beehive Capital Advisor, ABCOM Inves...",Private Equity,3.567629,2019
99,2019-10-04,Craftsvilla,E-commerce,Fashion and Apparel,Mumbai,Supera Pte Ltd,Inhouse Funding,20.241913,2019
102,2019-04-19,My Healthcare,E-commerce,Software Solutions,Bengaluru and Gurugram,Sixth Sense Ventures,Series A,24.852,2019
372,2018-05-01,WOTU,E-commerce,Food and Beverages,Bengaluru,"Claris Capital, Mumbai Angels Network",Angel / Seed Funding,0.0,2018
373,2018-08-01,DAAKI,E-commerce,Sports Nutrition Supplement etailer,Bengaluru,Rohan Agila,Angel / Seed Funding,0.0,2018
376,2018-10-01,Appario Retail Pvt Ltd.,E-commerce,Retail,Delhi,Amazon-Patni JV,Private Equity,156.07056,2018


In [602]:
startup[startup['investors'].str.contains(' IDG Ventures')]

Unnamed: 0,date,name,vertical,subvertical,city,investors,type,amount,year
95,2019-04-13,Tripoto,Social Media,Travel,New Delhi,"Orchid India, Hornbill Orchid India Fund, Chir...",Series B,29.75095,2019
243,2018-02-05,Bizongo,Consumer Internet,Aggregator For Packaging Material,Mumbai,"B Capital, International Finance Corporation (...",Private Equity,182.248,2018
290,2018-04-26,POPxo,Technology,Women Focussed Digital Media Platform,Delhi,"Neoplux, OPPO, IDG Ventures India, Kalaari Cap...",Private Equity,45.562,2018
481,2017-11-21,Heckyl,Technology,Big Data Analytics Platform,Mumbai,"Notion Capital, IDG Ventures",Private Equity,22.3668,2017
504,2017-05-10,Little Black Book,Consumer Internet,Local Recommendations and Discoveries Platform,New Delhi,"Blume Ventures, IDG Ventures & Indian Angel Ne...",Private Equity,8.284,2017
649,2017-08-29,CureFit,Consumer Internet,End to End health and wellness platform.,Bangalore,"Accel Partners, IDG Ventures, Kalaari Capital,...",Private Equity,207.1,2017
938,2017-03-16,POPxo,Consumer Internet,Digital Media publication for women,New Delhi,"GREE Ventures, Kalaari Capital and IDG Venture...",Private Equity,25.6804,2017
986,2017-02-13,Sigtuple,Technology,Medical Automation platform,Bangalore,"Accel Partners, IDG Ventures, Endiya Partners,...",Private Equity,48.0472,2017
1185,2016-11-14,Active.ai,Technology,Banking & Fintech Platform,Singapore,"Kalaari Capital, IDG Ventures India",Private Equity,24.852,2016
1189,2016-11-15,PlaySimple,Technology,Mobile Gaming portal,Bangalore,"SAIF Partners, IDG Ventures India",Private Equity,33.136,2016


In [603]:
sector = startup[startup['investors'].str.contains(' IDG Ventures')]['vertical'].value_counts()

In [604]:
def get_similar_investors(investor_name, df):
    # Filter the dataframe based on the investor name
    investor_df = startup[startup['investors'] == investor_name]

    # Get the vertical of the investor
    investor_vertical = investor_df['vertical'].iloc[0]

    # Filter the dataframe based on the vertical
    vertical_df = startup[startup['vertical'] == investor_vertical]

    # Exclude the investor from the list
    vertical_df = vertical_df[vertical_df['investors'] != investor_name]

    # Count the occurrences of each investor
    investor_counts = vertical_df['investors'].value_counts()

    # Get the top 5 investors
    top_investors = investor_counts
    return top_investors

# Example usage
investor_name = 'Kalaari Capital'
similar_investors = get_similar_investors(investor_name, df)

print(similar_investors)


Undisclosed investors                                                                                                   3
Undisclosed Investors                                                                                                   3
Trifecta Capital                                                                                                        2
Sequoia Capital                                                                                                         2
Venture Catalysts                                                                                                       2
                                                                                                                       ..
The Chennai Angels, LetsVenture                                                                                         1
Max Ventures and Industries                                                                                             1
Zodius Technology Opport

In [605]:
def get_similar_investors(investor_name, startup):
    # Filter the dataframe based on the investor name
    investor_df = startup[startup['investors'] == investor_name]

    # Get the vertical of the investor
    investor_vertical = investor_df['vertical'].iloc[0]

    # Filter the dataframe based on the vertical and exclude undisclosed investors
    vertical_df = startup[(startup['vertical'] == investor_vertical) & 
                     (~startup['investors'].str.contains('Undisclosed Investors', case=False))]

    # Exclude the investor from the list
    vertical_df = vertical_df[vertical_df['investors'] != investor_name]

    # Count the occurrences of each investor
    investor_counts = vertical_df['investors'].value_counts().reset_index()

    # Get the top 5 investors
    top_investors = investor_counts.head(5)

    return top_investors

# Example usage
investor_name = 'Kalaari Capital'
similar_investors = get_similar_investors(investor_name, startup)

similar_investors

Unnamed: 0,index,investors
0,Trifecta Capital,2
1,InnoVen Capital,2
2,Sequoia Capital,2
3,Venture Catalysts,2
4,Ratan Tata,2


# Overall Analysis 

In [606]:
startup.head(1)

Unnamed: 0,date,name,vertical,subvertical,city,investors,type,amount,year
0,2020-09-01,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,1656.8,2020


In [607]:
startup['month'] = startup['date'].dt.month

In [608]:
temp_df = startup.groupby(['year','month'])['amount'].sum().reset_index()
temp_df

Unnamed: 0,year,month,amount
0,2015,1,5296.375400
1,2015,2,1573.636924
2,2015,3,1918.532980
3,2015,4,4529.649780
4,2015,5,1407.203080
...,...,...,...
58,2019,12,3682.238000
59,2020,1,141.228896
60,2020,2,39.763200
61,2020,9,1808.884796


In [609]:
temp_df['MM-YYYY'] = temp_df['month'].astype('str') + '-' + temp_df['year'].astype('str')
temp_df.rename(columns = {
    'amount' : 'Total Funding (In Crore Rs.)'
},inplace=True)

temp_df

Unnamed: 0,year,month,Total Funding (In Crore Rs.),MM-YYYY
0,2015,1,5296.375400,1-2015
1,2015,2,1573.636924,2-2015
2,2015,3,1918.532980,3-2015
3,2015,4,4529.649780,4-2015
4,2015,5,1407.203080,5-2015
...,...,...,...,...
58,2019,12,3682.238000,12-2019
59,2020,1,141.228896,1-2020
60,2020,2,39.763200,2-2020
61,2020,9,1808.884796,9-2020


In [610]:
fig = px.line(temp_df, x='MM-YYYY', y='Total Funding (In Crore Rs.)', title='Total Amount of Funding in Startups in MM-YYYY')
fig.show()

In [611]:
temp_df = startup.groupby(['year','month'])['amount'].count().reset_index()
temp_df

Unnamed: 0,year,month,amount
0,2015,1,35
1,2015,2,14
2,2015,3,36
3,2015,4,65
4,2015,5,53
...,...,...,...
58,2019,12,10
59,2020,1,2
60,2020,2,2
61,2020,9,2


In [612]:
    temp_df['MM-YYYY'] = temp_df['month'].astype('str') + '-' + temp_df['year'].astype('str')
    temp_df.rename(columns = {
        'amount' : 'Total Number of Funding'
    },inplace=True)

    temp_df

Unnamed: 0,year,month,Total Number of Funding,MM-YYYY
0,2015,1,35,1-2015
1,2015,2,14,2-2015
2,2015,3,36,3-2015
3,2015,4,65,4-2015
4,2015,5,53,5-2015
...,...,...,...,...
58,2019,12,10,12-2019
59,2020,1,2,1-2020
60,2020,2,2,2-2020
61,2020,9,2,9-2020


In [613]:
fig = px.line(temp_df, x='MM-YYYY', y='Total Number of Funding',title='Total Number of Fundings in Startups in MM-YYYY')
fig.show()

In [614]:
startup['name'].nunique()

2298

In [615]:
startup

Unnamed: 0,date,name,vertical,subvertical,city,investors,type,amount,year,month
0,2020-09-01,BYJU’S,E-Tech,E-learning,Bengaluru,Tiger Global Management,Private Equity Round,1656.800000,2020,9
1,2020-01-13,Shuttl,Transportation,App based shuttle service,Gurgaon,Susquehanna Growth Equity,Series C,66.672896,2020,1
2,2020-09-01,Mamaearth,E-commerce,Retailer of baby and toddler products,Bengaluru,Sequoia Capital India,Series B,152.084796,2020,9
3,2020-02-01,https://www.wealthbucket.in/,FinTech,Online Investment,New Delhi,Vinod Khatumal,Pre-series A,24.852000,2020,2
4,2020-02-01,Fashor,Fashion and Apparel,Embroiled Clothes For Women,Mumbai,Sprout Venture Partners,Seed Round,14.911200,2020,2
...,...,...,...,...,...,...,...,...,...,...
2816,2015-04-29,Tracxn,Startup Analytics platform,,Bangalore,SAIF Partners,Private Equity,28.994000,2015,4
2817,2015-04-29,Dazo,Mobile Food Ordering app,,Bangalore,"Sumit Jain, Aprameya Radhakrishna, Alok Goel, ...",Seed Funding,0.000000,2015,4
2818,2015-04-29,Tradelab,Financial Markets Software,,Bangalore,Rainmatter,Seed Funding,3.313600,2015,4
2819,2015-04-29,PiQube,Hiring Analytics platform,,Chennai,The HR Fund,Seed Funding,4.142000,2015,4


In [638]:
# Top 10 most funded sector
# Plot pie chart 
temp_df = startup.groupby('vertical')['amount'].sum().reset_index()
most_funded_sectors = temp_df[temp_df['amount'] != 0.0].sort_values(by='amount',ascending=False).head(10)
most_funded_sectors['amount'] = round(most_funded_sectors['amount'],2)

most_funded_sectors

Unnamed: 0,vertical,amount
111,Consumer Internet,51714.05
763,eCommerce,41440.98
715,Transportation,32445.38
711,Technology,18468.83
173,ECommerce,15563.96
235,Finance,11302.59
233,FinTech,10112.37
168,E-Commerce,8288.31
526,Online Marketplace,5799.98
169,E-Commerce & M-Commerce platform,5633.12


In [639]:
import plotly.graph_objects as go


# Create a horizontal bar chart
fig = go.Figure(data=go.Bar(
    x=most_funded_sectors['amount'],
    y=most_funded_sectors['vertical'],
    orientation='h'
))

fig.update_layout(
    title='Top 10 Most Funded Sectors',
    xaxis=dict(title='Funding Amount (In Crore Rs)'),
    yaxis=dict(title='Sector')
)

fig.show()


In [618]:
most_funded_sectors

Unnamed: 0,Sector,Amount (In Crore Rs)
111,Consumer Internet,51714.05
763,eCommerce,41440.98
715,Transportation,32445.38
711,Technology,18468.83
173,ECommerce,15563.96
235,Finance,11302.59
233,FinTech,10112.37
168,E-Commerce,8288.31
526,Online Marketplace,5799.98
169,E-Commerce & M-Commerce platform,5633.12


In [628]:
# Top 10 most funded type of round in startup funding
# Plot a pie chart
temp_df = startup.groupby('type')['amount'].sum().reset_index()
most_funded_type = temp_df[temp_df['amount'] != 0.0].sort_values(by='amount',ascending=False).head(10)
most_funded_type

Unnamed: 0,type,amount
19,Private Equity,205419.533979
32,Series B,39631.370371
35,Series D,12275.222916
34,Series C,8654.447176
11,Funding Round,8284.0
26,Seed Funding,4482.909861
20,Private Equity Round,4191.704
38,Series G,1913.604
31,Series A,1683.3088
27,Seed Round,1622.0072


In [629]:
# Create a horizontal bar chart
fig = go.Figure(data=go.Bar(
    x=most_funded_type['amount'],
    y=most_funded_type['type'],
    orientation='h'
))

fig.update_layout(
    title='Top 10 Most Funded Types of Rounds',
    xaxis=dict(title='Funding Amount (In Crore Rs)'),
    yaxis=dict(title='Type of Investment')
)

fig.show()

In [636]:
# Top 10 most funded cities
# Plot 

temp_df = startup.groupby('city')['amount'].sum().reset_index()
most_funded_city = temp_df[temp_df['amount'] != 0]

# Add the amount of Bengaluru to Bangalore
most_funded_city.loc[most_funded_city['city'] == 'Bangalore', 'amount'] += most_funded_city.loc[most_funded_city['city'] == 'Bengaluru', 'amount'].values[0]

# Drop the Bengaluru row
most_funded_city = most_funded_city[most_funded_city['city'] != 'Bengaluru']

most_funded_city = most_funded_city.sort_values(by='amount',ascending=False).head(10)

most_funded_city['amount'] = round(most_funded_city['amount'],2)

most_funded_city

Unnamed: 0,city,amount
6,Bangalore,152745.84
60,Mumbai,35635.37
66,New Delhi,24999.59
32,Gurgaon,24855.36
73,Noida,10461.57
34,Gurugram,7173.39
23,Chennai,5954.27
78,Pune,5244.45
58,Menlo Park,3727.8
38,Hyderabad,3322.52


In [637]:
# Create a horizontal bar chart
fig = go.Figure(data=go.Bar(
    x=most_funded_city['amount'],
    y=most_funded_city['city'],
    orientation='h'
))

fig.update_layout(
    title='Most Funded Cities',
    xaxis=dict(title='Funding Amount (In Crore Rs)'),
    yaxis=dict(title='City')
)

fig.show()

In [None]:
startup.head(1)

In [None]:
# Most funded startups over the year
# Plot a bar chart
most_funded_startup_yoy = startup.groupby(['year','name'])['amount'].sum().sort_values(ascending=False).reset_index().drop_duplicates('year',keep='first').sort_values(by='year')
most_funded_startup_yoy.rename(columns = {
    'year' : 'Year',
    'name':'StartUp Name',
    'amount':'Amount (In Crore Rs)'
},inplace=True)

most_funded_startup_yoy

In [634]:
fig = px.bar(most_funded_startup_yoy, x='StartUp Name', y='Amount (In Crore Rs)', color='Year')
fig.show()

In [632]:
# Top investors
# Plot a bar graph
# New dataframe with separate rows for each investor
investor_list = []
for index, row in startup.iterrows():
    investors = row['investors'].split(', ')
    for investor in investors:
        investor_list.append({
            'date': row['date'],
            'name': row['name'],
            'vertical': row['vertical'],
            'subvertical': row['subvertical'],
            'city': row['city'],
            'investors': investor,
            'type': row['type'],
            'amount': row['amount'],
            'year': row['year'],
            'month': row['month']
        })

new_df = pd.DataFrame(investor_list)

top_investors = new_df.groupby('investors')['amount'].sum().reset_index()

# Add the amounts of SoftBank Group and Softbank and store the result in SoftBank Group
softbank_group_amount = top_investors.loc[top_investors['investors'] == 'SoftBank Group', 'amount'].values[0]
softbank_amount = top_investors.loc[top_investors['investors'] == 'Softbank', 'amount'].values[0]
updated_amount = softbank_group_amount + softbank_amount
top_investors.loc[top_investors['investors'] == 'SoftBank Group', 'amount'] = updated_amount

# Drop the Softbank row
top_investors = top_investors[top_investors['investors'] != 'Softbank']

top_investors = top_investors.sort_values(by='amount',ascending=False).head(10)
top_investors

Unnamed: 0,investors,amount
2536,SoftBank Group,49969.088
3005,Westbridge Capital,32307.6
2740,Tiger Global,15923.919
2719,Tencent Holdings,13337.24
657,DST Global,12359.728
1613,Microsoft,11597.6
3103,eBay,11597.6
2426,Sequoia Capital,11049.86192
2917,Vijay Shekhar Sharma,8328.7336
149,Alibaba,8284.0


In [633]:
# Create a horizontal bar chart
fig = go.Figure(data=go.Bar(
    x=top_investors['amount'],
    y=top_investors['investors'],
    orientation='h'
))

fig.update_layout(
    title='Top Most Investors',
    xaxis=dict(title='Funding Amount (In Crore Rs)'),
    yaxis=dict(title='Investor')
)

fig.show()

In [630]:
# Funding Amount by Year and Month

import plotly.graph_objects as go

# Aggregate funding amount by year and month
df_agg = startup.groupby(['year', 'month'])['amount'].sum().reset_index()

# Create pivot table
pivot_table = df_agg.pivot(index='year', columns='month', values='amount')

# Plotting the heatmap
heatmap = go.Heatmap(
    x=pivot_table.columns,
    y=pivot_table.index,
    z=pivot_table.values,
    colorscale='Viridis'
)

layout = go.Layout(
    title='Funding Amount by Year and Month',
    xaxis=dict(title='Month'),
    yaxis=dict(title='Year')
)

figure = go.Figure(data=[heatmap], layout=layout)
figure.show()