In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats

#for data visualisation
import seaborn as sns
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
from matplotlib.pyplot import figure
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objects as go

%matplotlib inline

In [2]:
#import the dataset
df = pd.read_csv('/content/Startups.csv', encoding='latin-1')
df.head()

Unnamed: 0.1,Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Investors
0,0,Bytedance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S..."
1,1,SpaceX,$100.3,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,2,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG"
3,3,Klarna,$45.6,12/12/2011,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita..."
4,4,Canva,$40,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat..."


In [3]:
#lets clean the data
df = pd.DataFrame(df)
df.rename(columns = {'Date Joined' : 'Date'}, inplace = True)
df.rename(columns = {'Valuation ($B)' : 'Valuation'}, inplace = True)
df.rename(columns = {'Select Investors' : 'Investors'}, inplace = True)
df['Investors'] = df['Investors'].to_string()

date = df.Date.str.split('/', expand=True)
df['year'] = date[2]
df['month'] = date[1]
df['day'] = date[0]
df.year = pd.to_numeric(df.year)
df.month = pd.to_numeric(df.month)
df.day = pd.to_numeric(df.day)


df['Valuation'] = df['Valuation'].str.replace('$', '')
df.Valuation = pd.to_numeric(df.Valuation)
df.year = pd.to_numeric(df.year)


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



In [4]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,Company,Valuation,Date,Country,City,Industry,Investors,year,month,day
0,0,Bytedance,140.0,4/7/2017,China,Beijing,Artificial intelligence,"0 Sequoia Capital China, SIG Asia Investm...",2017,7,4
1,1,SpaceX,100.3,12/1/2012,United States,Hawthorne,Other,"0 Sequoia Capital China, SIG Asia Investm...",2012,1,12
2,2,Stripe,95.0,1/23/2014,United States,San Francisco,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2014,23,1
3,3,Klarna,45.6,12/12/2011,Sweden,Stockholm,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2011,12,12
4,4,Canva,40.0,1/8/2018,Australia,Surry Hills,Internet software & services,"0 Sequoia Capital China, SIG Asia Investm...",2018,8,1
5,5,Instacart,39.0,12/30/2014,United States,San Francisco,"Supply chain, logistics, & delivery","0 Sequoia Capital China, SIG Asia Investm...",2014,30,12
6,6,Databricks,38.0,2/5/2019,United States,San Francisco,Data management & analytics,"0 Sequoia Capital China, SIG Asia Investm...",2019,5,2
7,7,Revolut,33.0,4/26/2018,United Kingdom,London,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2018,26,4
8,8,Nubank,30.0,3/1/2018,Brazil,Sao Paulo,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2018,1,3
9,9,Epic Games,28.7,10/26/2018,United States,Cary,Other,"0 Sequoia Capital China, SIG Asia Investm...",2018,26,10


In [6]:
#unicorn startups in different industry sectors
fig = px.pie(df, names='Industry')
fig.show()

In [7]:
#Year wise company joined the unicorn club
fig2 = px.line(df, x="Company", y="year", title="Year wise Company Joined")
fig2.show()

In [8]:
#countrywise unicorn startups
#lets talk about India

India = df[df['Country'] == "India"]
India

Unnamed: 0.1,Unnamed: 0,Company,Valuation,Date,Country,City,Industry,Investors,year,month,day
12,12,BYJU's,21.0,7/25/2017,India,Bengaluru,Edtech,"0 Sequoia Capital China, SIG Asia Investm...",2017,25,7
53,53,OYO Rooms,9.6,9/25/2018,India,Gurugram,Travel,"0 Sequoia Capital China, SIG Asia Investm...",2018,25,9
62,62,Dream11,8.0,4/9/2019,India,Mumbai,Internet software & services,"0 Sequoia Capital China, SIG Asia Investm...",2019,9,4
80,80,National Stock Exchange of India,6.5,7/1/2020,India,Mumbai,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2020,1,7
83,83,Ola Cabs,7.5,10/27/2014,India,Bengaluru,Auto & transportation,"0 Sequoia Capital China, SIG Asia Investm...",2014,27,10
115,115,Swiggy,5.5,6/21/2018,India,Bengaluru,"Supply chain, logistics, & delivery","0 Sequoia Capital China, SIG Asia Investm...",2018,21,6
126,126,Meesho,4.9,4/5/2021,India,Bengaluru,Internet software & services,"0 Sequoia Capital China, SIG Asia Investm...",2021,5,4
153,153,CRED,4.01,4/6/2021,India,Bengaluru,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2021,6,4
193,193,Digit Insurance,3.5,1/15/2021,India,Bengaluru,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2021,15,1
199,199,Unacademy,3.44,9/2/2020,India,Bengaluru,Edtech,"0 Sequoia Capital China, SIG Asia Investm...",2020,2,9


In [9]:
#Based on Valuation

company_wise_valuation = df.sort_values(by="Valuation", ascending=False)
company_wise_valuation.head(10)

Unnamed: 0.1,Unnamed: 0,Company,Valuation,Date,Country,City,Industry,Investors,year,month,day
0,0,Bytedance,140.0,4/7/2017,China,Beijing,Artificial intelligence,"0 Sequoia Capital China, SIG Asia Investm...",2017,7,4
1,1,SpaceX,100.3,12/1/2012,United States,Hawthorne,Other,"0 Sequoia Capital China, SIG Asia Investm...",2012,1,12
2,2,Stripe,95.0,1/23/2014,United States,San Francisco,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2014,23,1
3,3,Klarna,45.6,12/12/2011,Sweden,Stockholm,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2011,12,12
4,4,Canva,40.0,1/8/2018,Australia,Surry Hills,Internet software & services,"0 Sequoia Capital China, SIG Asia Investm...",2018,8,1
5,5,Instacart,39.0,12/30/2014,United States,San Francisco,"Supply chain, logistics, & delivery","0 Sequoia Capital China, SIG Asia Investm...",2014,30,12
6,6,Databricks,38.0,2/5/2019,United States,San Francisco,Data management & analytics,"0 Sequoia Capital China, SIG Asia Investm...",2019,5,2
7,7,Revolut,33.0,4/26/2018,United Kingdom,London,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2018,26,4
8,8,Nubank,30.0,3/1/2018,Brazil,Sao Paulo,Fintech,"0 Sequoia Capital China, SIG Asia Investm...",2018,1,3
9,9,Epic Games,28.7,10/26/2018,United States,Cary,Other,"0 Sequoia Capital China, SIG Asia Investm...",2018,26,10


In [10]:
#lets talk about spaceX
df[df['Company'] == 'SpaceX']

Unnamed: 0.1,Unnamed: 0,Company,Valuation,Date,Country,City,Industry,Investors,year,month,day
1,1,SpaceX,100.3,12/1/2012,United States,Hawthorne,Other,"0 Sequoia Capital China, SIG Asia Investm...",2012,1,12


In [11]:
#Most Valued Startup city in the world
x = df.groupby('City').sum().sort_values(by="Valuation", ascending=False).head(20)
x = x.reset_index()
px.bar(x="City", y="Valuation", data_frame=x)


The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [12]:
#Number of unicorn Startups in a city
df.City.value_counts().head(15)

San Francisco    134
New York          81
Beijing           62
Shanghai          45
London            28
Bengaluru         25
Shenzhen          18
Berlin            16
Paris             15
Chicago           14
Hangzhou          14
Boston            14
Tel Aviv          13
Mountain View     13
Palo Alto         11
Name: City, dtype: int64

In [13]:
#Unicorn startups in Bengaluru City
df[df.City == 'Bengaluru']['Company']

12                     BYJU's
83                   Ola Cabs
115                    Swiggy
126                    Meesho
153                      CRED
193           Digit Insurance
199                 Unacademy
221                     Udaan
236                  Razorpay
245                 ShareChat
252     Ola Electric Mobility
257                     Groww
299     Mobile Premier League
523                   CureFit
683                      apna
686    Acko General Insurance
709                    InMobi
794                 DailyHunt
871                 BlackBuck
880                   Zetwerk
898                   Vedantu
901                   Licious
920              Mensa Brands
926                  NoBroker
927                     Slice
Name: Company, dtype: object

In [17]:
#Country Based unicorn startups

df[df.Country == 'India'][['Company', 'Valuation', 'Industry', 'year']]

Unnamed: 0,Company,Valuation,Industry,year
12,BYJU's,21.0,Edtech,2017
53,OYO Rooms,9.6,Travel,2018
62,Dream11,8.0,Internet software & services,2019
80,National Stock Exchange of India,6.5,Fintech,2020
83,Ola Cabs,7.5,Auto & transportation,2014
115,Swiggy,5.5,"Supply chain, logistics, & delivery",2018
126,Meesho,4.9,Internet software & services,2021
153,CRED,4.01,Fintech,2021
193,Digit Insurance,3.5,Fintech,2021
199,Unacademy,3.44,Edtech,2020
