In [1]:
import pandas as pd
import seaborn as sns
import numpy as np 
from datetime import datetime, timedelta
from matplotlib import pyplot as plt

# General Info

In [2]:
uni = pd.read_csv("./Unicorn_companies.csv",sep=",")
uni.head(3)

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Investors
0,Bytedance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S..."
1,SpaceX,$125,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,SHEIN,$100,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China..."


In [3]:
uni.dtypes

Company             object
Valuation ($B)      object
Date Joined         object
Country             object
City                object
Industry            object
Select Investors    object
dtype: object

In [4]:
uni.shape

(1157, 7)

In [5]:
uni.columns

Index(['Company', 'Valuation ($B)', 'Date Joined', 'Country', 'City',
       'Industry', 'Select Investors'],
      dtype='object')

In [6]:
uni.isnull().sum()

Company              0
Valuation ($B)       0
Date Joined          0
Country              0
City                17
Industry             0
Select Investors     1
dtype: int64

# Finding company name homonyms

In [7]:
uni['Company'].value_counts()

Fabric            2
Bolt              2
Qumulo            1
Cava Group        1
Aibee             1
                 ..
Jobandtalent      1
Bought By Many    1
Freenome          1
Project44         1
JupiterOne        1
Name: Company, Length: 1155, dtype: int64

In [8]:
Test1 =uni.query('Company == "Fabric"')
Test1

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Investors
642,Fabric,$1.5,2/24/2022,United States,Bellevue,E-commerce & direct-to-consumer,"Redpoint Ventures, Norwest Venture Partners, S..."
1070,Fabric,$1,10/26/2021,United States,New York,"Supply chain, logistics, & delivery","Innovation Endeavors, Aleph, Temasek"


In [9]:
Test2 =uni.query('Company == "Bolt"')
Test2

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Investors
43,Bolt,$11,10/8/2021,United States,San Francisco,Fintech,"Activant Capital, Tribe Capital, General Atlantic"
72,Bolt,$8.4,5/29/2018,Estonia,Tallinn,Auto & transportation,"Didi Chuxing, Diamler, TMT Investments"


In [10]:
ranking = range(1, len(uni) + 1)
uni.insert(0,'ranking_companies', ranking)
uni

Unnamed: 0,ranking_companies,Company,Valuation ($B),Date Joined,Country,City,Industry,Select Investors
0,1,Bytedance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S..."
1,2,SpaceX,$125,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,3,SHEIN,$100,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China..."
3,4,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG"
4,5,Klarna,$45.6,12/12/2011,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita..."
...,...,...,...,...,...,...,...,...
1152,1153,Habi,$1,5/11/2022,Colombia,Bogota,Fintech,"Homebrew, Inspired Capital, Tiger Global Manag..."
1153,1154,Glean,$1,5/18/2022,United States,Palo Alto,Internet software & services,"General Catalyst, Kleiner Perkins Caufield & B..."
1154,1155,CareBridge,$1,6/8/2022,United States,Nashville,Health,"Bain Capital Ventures, Splunk Ventures, Cisco ..."
1155,1156,Immuta,$1,6/8/2022,United States,Boston,Data management & analytics,"DFJ Growth Fund, Dell Technologies Capital, Te..."


In [11]:
Unique_company = uni['ranking_companies'].nunique()
Unique_company

1157

# Change all column names to lowercase

In [12]:
lowercase = lambda x: str(x).lower()
uni.rename(lowercase, axis='columns', inplace= True)

# Classification

In [13]:
uni["country"].value_counts()

United States           618
China                   174
India                    67
United Kingdom           44
Germany                  29
France                   24
Israel                   22
Canada                   19
Brazil                   17
Singapore                13
South Korea              13
Australia                 8
Sweden                    8
Hong Kong                 7
Mexico                    7
Netherlands               7
Japan                     6
Ireland                   6
Switzerland               6
Indonesia                 6
Norway                    5
Spain                     4
Finland                   4
Belgium                   3
Turkey                    3
United Arab Emirates      3
Colombia                  3
Denmark                   2
South Africa              2
Croatia                   2
Philippines               2
Chile                     2
Thailand                  2
Estonia                   2
Austria                   2
Lithuania           

In [14]:
uni["industry"].value_counts()

Fintech                                244
Internet software & services           215
E-commerce & direct-to-consumer        116
Health                                  88
Artificial intelligence                 73
Other                                   66
Supply chain, logistics, & delivery     58
Cybersecurity                           58
Data management & analytics             44
Mobile & telecommunications             39
Hardware                                37
Auto & transportation                   35
Edtech                                  30
Consumer & retail                       27
Travel                                  14
Artificial Intelligence                 12
Internet Software Services               1
Name: industry, dtype: int64

In [15]:
uni["city"].value_counts()

San Francisco    163
New York         109
Beijing           63
Shanghai          45
London            35
                ... 
Goleta             1
Lagos              1
Pennsauken         1
Plantation         1
Morrisville        1
Name: city, Length: 277, dtype: int64

# Replace null Values & fixing mistakes

In [16]:
selected_rows = uni[uni['select investors'].isnull()]

In [17]:
selected_rows

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors
889,890,LinkSure Network,$1,1/1/2015,China,Shanghai,Mobile & telecommunications,


In [18]:
uni.loc[uni["company"] == "LinkSure Network", "select investors"] = "No Available info"

In [19]:
uni.fillna(0, inplace=True)

In [20]:
City_empty =uni.query('city== 0')
City_empty

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors
11,12,FTX,$32,7/20/2021,Bahamas,0,Fintech,"Sequoia Capital, Thoma Bravo, Softbank"
236,237,HyalRoute,$3.5,5/26/2020,Singapore,0,Mobile & telecommunications,Kuang-Chi
309,310,Amber Group,$3,6/21/2021,Hong Kong,0,Fintech,"Tiger Global Management, Tiger Brokers, DCM Ve..."
340,341,Moglix,$2.6,5/17/2021,Singapore,0,E-commerce & direct-to-consumer,"Jungle Ventures, Accel, Venture Highway"
367,368,Coda Payments,$2.5,4/15/2022,Singapore,0,Fintech,"GIC. Apis Partners, Insight Partners"
477,478,Advance Intelligence Group,$2,9/23/2021,Singapore,0,Artificial intelligence,"Vision Plus Capital, GSR Ventures, ZhenFund"
490,491,Trax,$2,7/22/2019,Singapore,0,Artificial intelligence,"Hopu Investment Management, Boyu Capital, DC T..."
842,843,Carousell,$1.1,9/15/2021,Singapore,0,E-commerce & direct-to-consumer,"500 Global, Rakuten Ventures, Golden Gate Vent..."
913,914,WeLab,$1,11/8/2017,Hong Kong,0,Fintech,"Sequoia Capital China, ING, Alibaba Entreprene..."
978,979,PatSnap,$1,3/16/2021,Singapore,0,Internet software & services,"Sequoia Capital China, Shunwei Capital Partner..."


In [21]:
CHK =uni.query('country== "Hong Kong"')
CHK

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors
47,48,Lalamove,$10,2/21/2019,Hong Kong,Cheung Sha Wan,"Supply chain, logistics, & delivery","MindWorks Ventures, Shunwei Capital Partners, ..."
309,310,Amber Group,$3,6/21/2021,Hong Kong,0,Fintech,"Tiger Global Management, Tiger Brokers, DCM Ve..."
428,429,Babel Finance,$2,5/25/2022,Hong Kong,Hong Kong,Fintech,"Dragonfly Capital, BAI Capital, 10T Fund"
429,430,Trendy Group International,$2,2/13/2012,Hong Kong,Kowloon,Consumer & retail,L Capital Partners
699,700,Klook,$1.35,8/7/2018,Hong Kong,Central,Travel,"Sequoia Capital China, Goldman Sachs, Matrix P..."
913,914,WeLab,$1,11/8/2017,Hong Kong,0,Fintech,"Sequoia Capital China, ING, Alibaba Entreprene..."
1043,1044,Cider,$1,9/2/2021,Hong Kong,0,E-commerce & direct-to-consumer,"Andreessen Horowitz, DST Global, IDG Capital"


In [22]:
uni.loc[uni["company"] == "Amber Group", "city"] = "Central"
uni.loc[uni["company"] == "WeLab", "city"] = "Quarry Bay"
uni.loc[uni["company"] == "Cider", "city"] = "Hong Kong"

In [23]:
BHS =uni.query('country== "Bahamas"')
BHS

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors
11,12,FTX,$32,7/20/2021,Bahamas,0,Fintech,"Sequoia Capital, Thoma Bravo, Softbank"


In [24]:
SGP =uni.query('country== "Singapore"')
SGP

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors
236,237,HyalRoute,$3.5,5/26/2020,Singapore,0,Mobile & telecommunications,Kuang-Chi
340,341,Moglix,$2.6,5/17/2021,Singapore,0,E-commerce & direct-to-consumer,"Jungle Ventures, Accel, Venture Highway"
367,368,Coda Payments,$2.5,4/15/2022,Singapore,0,Fintech,"GIC. Apis Partners, Insight Partners"
477,478,Advance Intelligence Group,$2,9/23/2021,Singapore,0,Artificial intelligence,"Vision Plus Capital, GSR Ventures, ZhenFund"
490,491,Trax,$2,7/22/2019,Singapore,0,Artificial intelligence,"Hopu Investment Management, Boyu Capital, DC T..."
842,843,Carousell,$1.1,9/15/2021,Singapore,0,E-commerce & direct-to-consumer,"500 Global, Rakuten Ventures, Golden Gate Vent..."
978,979,PatSnap,$1,3/16/2021,Singapore,0,Internet software & services,"Sequoia Capital China, Shunwei Capital Partner..."
1005,1006,Matrixport,$1.05,6/1/2021,Singapore,0,Fintech,"Dragonfly Captial, Qiming Venture Partners, DS..."
1010,1011,Carro,$1,6/14/2021,Singapore,0,E-commerce & direct-to-consumer,"SingTel Innov8, Alpha JWC Ventures, Golden Gat..."
1022,1023,bolttech,$1,7/1/2021,Singapore,0,Fintech,"Mundi Ventures, Doqling Capital Partners, Acti..."


In [25]:
uni.loc[uni["country"] == "Singapore", "city"] = "Singapore"
uni.loc[uni["country"] == "Bahamas", "city"] = "Nassau"

In [26]:
uni['value'] = uni['valuation ($b)'].str.replace('$', '')
uni['cities'] = uni['city']

  uni['value'] = uni['valuation ($b)'].str.replace('$', '')


In [27]:
uni.columns

Index(['ranking_companies', 'company', 'valuation ($b)', 'date joined',
       'country', 'city', 'industry', 'select investors', 'value', 'cities'],
      dtype='object')

# Changing value & date types

In [28]:
uni['value'] = pd.to_numeric(uni['value'])

In [29]:
uni['date joined'] = uni['date joined'].astype('datetime64[ns]')

In [30]:
uni.dtypes

ranking_companies             int64
company                      object
valuation ($b)               object
date joined          datetime64[ns]
country                      object
city                         object
industry                     object
select investors             object
value                       float64
cities                       object
dtype: object

In [31]:
Unique_city = uni['city'].nunique()
Unique_city

280

In [32]:
Unique_country = uni['country'].nunique()
Unique_country

48

# Preparing column city to identify the location

In [33]:
uni.loc[uni["country"] == "Hong Kong", "city"] = "Hong Kong"

In [34]:
uni.head()

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
0,1,Bytedance,$140,2017-04-07,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",140.0,Beijing
1,2,SpaceX,$125,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",125.0,Hawthorne
2,3,SHEIN,$100,2018-07-03,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",100.0,Shenzhen
3,4,Stripe,$95,2014-01-23,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",95.0,San Francisco
4,5,Klarna,$45.6,2011-12-12,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita...",45.6,Stockholm


In [35]:
Test3 =uni.query('country== "Hong Kong"')
Test3

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
47,48,Lalamove,$10,2019-02-21,Hong Kong,Hong Kong,"Supply chain, logistics, & delivery","MindWorks Ventures, Shunwei Capital Partners, ...",10.0,Cheung Sha Wan
309,310,Amber Group,$3,2021-06-21,Hong Kong,Hong Kong,Fintech,"Tiger Global Management, Tiger Brokers, DCM Ve...",3.0,Central
428,429,Babel Finance,$2,2022-05-25,Hong Kong,Hong Kong,Fintech,"Dragonfly Capital, BAI Capital, 10T Fund",2.0,Hong Kong
429,430,Trendy Group International,$2,2012-02-13,Hong Kong,Hong Kong,Consumer & retail,L Capital Partners,2.0,Kowloon
699,700,Klook,$1.35,2018-08-07,Hong Kong,Hong Kong,Travel,"Sequoia Capital China, Goldman Sachs, Matrix P...",1.35,Central
913,914,WeLab,$1,2017-11-08,Hong Kong,Hong Kong,Fintech,"Sequoia Capital China, ING, Alibaba Entreprene...",1.0,Quarry Bay
1043,1044,Cider,$1,2021-09-02,Hong Kong,Hong Kong,E-commerce & direct-to-consumer,"Andreessen Horowitz, DST Global, IDG Capital",1.0,Hong Kong


# Second File locations

In [36]:
WC = pd.read_csv("./worldcities.csv",sep=",")
WC.head(3)

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
0,Tokyo,Tokyo,35.6897,139.6922,Japan,JP,JPN,Tōkyō,primary,37977000.0,1392685764
1,Jakarta,Jakarta,-6.2146,106.8451,Indonesia,ID,IDN,Jakarta,primary,34540000.0,1360771077
2,Delhi,Delhi,28.66,77.23,India,IN,IND,Delhi,admin,29617000.0,1356872604


In [37]:
WC.shape

(41001, 11)

In [38]:
WC.columns

Index(['city', 'city_ascii', 'lat', 'lng', 'country', 'iso2', 'iso3',
       'admin_name', 'capital', 'population', 'id'],
      dtype='object')

# Fix problem names country and city

In [39]:
uni.query('country== "Israel"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
112,113,Wiz,$6,2021-03-17,Israel,Tel Aviv,Cybersecurity,"Insight Partners, Sequoia Capital, Index Ventures",6.0,Tel Aviv
142,143,Moon Active,$5,2020-01-26,Israel,Tel Aviv,Mobile & telecommunications,"Insight Partners, Andalusian Capital Partners",5.0,Tel Aviv
329,330,Transmit Security,$2.74,2021-06-22,Israel,Tel Aviv,Cybersecurity,"General Atlantic, Insight Partners, Vintage In...",2.74,Tel Aviv
355,356,Cato Networks,$2.5,2020-11-17,Israel,Tel Aviv,Cybersecurity,"Aspect Ventures, SingTel Innov8, Greylock Part...",2.5,Tel Aviv
451,452,Earnix,$2,2021-02-21,Israel,Giv'atayim,Fintech,"Jerusalem Venture Partners, Israel Growth Part...",2.0,Giv'atayim
466,467,Pagaya,$2,2020-06-17,Israel,Tel Aviv,Fintech,"Oak HC/FT Partners, GF Investments, Harvey Gol...",2.0,Tel Aviv
486,487,StarkWare,$8,2021-11-17,Israel,Netanya,Cybersecurity,"Sequoia Capital, Paradigm, Pantera Capital",8.0,Netanya
523,524,Lightricks,$1.8,2019-07-31,Israel,Jerusalem,Artificial intelligence,"Viola Ventures, Insight Partners, ClalTech, Go...",1.8,Jerusalem
551,552,Hibob,$1.65,2021-10-12,Israel,Tel Aviv,Internet software & services,"Bessemer Venture Partners, Eight Roads Venture...",1.65,Tel Aviv
611,612,Next Silicon,$1.5,2021-06-14,Israel,Tel Aviv,Hardware,"Amiti Ventures, Playground Global, Aleph",1.5,Tel Aviv


# Changing Israelian cities names

In [40]:
uni["city"].replace({'Tel Aviv': 'Tel Aviv-Yafo', "Giv'atayim": 'Givatayim','Herzliya': 'Herzliyya',
                    'Tirat Carmel': 'Tirat Karmel',"Ra'anana": "Ra`ananna", 'Petah Tikva': 'Petah Tiqwa'},
                    inplace=True)

In [41]:
uni.query('country== "Israel"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
112,113,Wiz,$6,2021-03-17,Israel,Tel Aviv-Yafo,Cybersecurity,"Insight Partners, Sequoia Capital, Index Ventures",6.0,Tel Aviv
142,143,Moon Active,$5,2020-01-26,Israel,Tel Aviv-Yafo,Mobile & telecommunications,"Insight Partners, Andalusian Capital Partners",5.0,Tel Aviv
329,330,Transmit Security,$2.74,2021-06-22,Israel,Tel Aviv-Yafo,Cybersecurity,"General Atlantic, Insight Partners, Vintage In...",2.74,Tel Aviv
355,356,Cato Networks,$2.5,2020-11-17,Israel,Tel Aviv-Yafo,Cybersecurity,"Aspect Ventures, SingTel Innov8, Greylock Part...",2.5,Tel Aviv
451,452,Earnix,$2,2021-02-21,Israel,Givatayim,Fintech,"Jerusalem Venture Partners, Israel Growth Part...",2.0,Giv'atayim
466,467,Pagaya,$2,2020-06-17,Israel,Tel Aviv-Yafo,Fintech,"Oak HC/FT Partners, GF Investments, Harvey Gol...",2.0,Tel Aviv
486,487,StarkWare,$8,2021-11-17,Israel,Netanya,Cybersecurity,"Sequoia Capital, Paradigm, Pantera Capital",8.0,Netanya
523,524,Lightricks,$1.8,2019-07-31,Israel,Jerusalem,Artificial intelligence,"Viola Ventures, Insight Partners, ClalTech, Go...",1.8,Jerusalem
551,552,Hibob,$1.65,2021-10-12,Israel,Tel Aviv-Yafo,Internet software & services,"Bessemer Venture Partners, Eight Roads Venture...",1.65,Tel Aviv
611,612,Next Silicon,$1.5,2021-06-14,Israel,Tel Aviv-Yafo,Hardware,"Amiti Ventures, Playground Global, Aleph",1.5,Tel Aviv


# Changing South Korea & Czech Republic names

In [42]:
WC.query('city== "Seoul"')

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
7,Seoul,Seoul,37.56,126.99,"Korea, South",KR,KOR,Seoul,primary,21794000.0,1410836482


In [43]:
uni.query('country== "South Korea"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
86,87,Toss,$7.4,2018-12-09,South Korea,Seoul,Fintech,"Bessemer Venture Partners, Qualcomm Ventures, ...",7.4,Seoul
188,189,Yello Mobile,$4,2014-11-11,South Korea,Seoul,Mobile & telecommunications,Formation 8,4.0,Seoul
260,261,Kurly,$3.3,2021-07-09,South Korea,Seoul,"Supply chain, logistics, & delivery","Sequoia Capital China, DST Global, DST Global",3.3,Seoul
387,388,WEMAKEPRICE,$2.34,2015-09-09,South Korea,Seoul,E-commerce & direct-to-consumer,"IMM Investment, NXC",2.34,Seoul
402,403,MUSINSA,$2.2,2019-11-11,South Korea,Seoul,E-commerce & direct-to-consumer,Sequoia Capital,2.2,Seoul
684,685,Bucketplace,$1.4,2022-05-09,South Korea,Seoul,E-commerce & direct-to-consumer,"IMM Investment, Mirae Asset Capital, BOND",1.4,Seoul
705,706,RIDI,$1.33,2022-01-24,South Korea,Seoul,E-commerce & direct-to-consumer,"Atinum Investment, Company K Partners, GIC",1.33,Seoul
708,709,GPclub,$1.32,2018-10-22,South Korea,Seoul,Other,Goldman Sachs,1.32,Seoul
806,807,L&P Cosmetic,$1.19,2016-01-01,South Korea,Seoul,Consumer & retail,CDIB Capital,1.19,Seoul
878,879,Aprogen,$1.04,2019-05-31,South Korea,Seongnam-Si,Health,"Lindeman Asia Investment, Nichi-Iko Pharmaceut...",1.04,Seongnam-Si


In [44]:
uni["country"].replace({'South Korea': 'Korea, South'},
                    inplace=True)
uni.query('country== "Korea, South"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
86,87,Toss,$7.4,2018-12-09,"Korea, South",Seoul,Fintech,"Bessemer Venture Partners, Qualcomm Ventures, ...",7.4,Seoul
188,189,Yello Mobile,$4,2014-11-11,"Korea, South",Seoul,Mobile & telecommunications,Formation 8,4.0,Seoul
260,261,Kurly,$3.3,2021-07-09,"Korea, South",Seoul,"Supply chain, logistics, & delivery","Sequoia Capital China, DST Global, DST Global",3.3,Seoul
387,388,WEMAKEPRICE,$2.34,2015-09-09,"Korea, South",Seoul,E-commerce & direct-to-consumer,"IMM Investment, NXC",2.34,Seoul
402,403,MUSINSA,$2.2,2019-11-11,"Korea, South",Seoul,E-commerce & direct-to-consumer,Sequoia Capital,2.2,Seoul
684,685,Bucketplace,$1.4,2022-05-09,"Korea, South",Seoul,E-commerce & direct-to-consumer,"IMM Investment, Mirae Asset Capital, BOND",1.4,Seoul
705,706,RIDI,$1.33,2022-01-24,"Korea, South",Seoul,E-commerce & direct-to-consumer,"Atinum Investment, Company K Partners, GIC",1.33,Seoul
708,709,GPclub,$1.32,2018-10-22,"Korea, South",Seoul,Other,Goldman Sachs,1.32,Seoul
806,807,L&P Cosmetic,$1.19,2016-01-01,"Korea, South",Seoul,Consumer & retail,CDIB Capital,1.19,Seoul
878,879,Aprogen,$1.04,2019-05-31,"Korea, South",Seongnam-Si,Health,"Lindeman Asia Investment, Nichi-Iko Pharmaceut...",1.04,Seongnam-Si


In [45]:
uni["country"].replace({'Czech Republic':'Czechia'},
                    inplace=True)
uni.query('country== "Czechia"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
781,782,Rohlik,$1.2,2021-07-01,Czechia,Prague,"Supply chain, logistics, & delivery","Partech Partners, Index Ventures, Quadrille Ca...",1.2,Prague


In [46]:
uni["country"].replace({'Bahamas':'Bahamas, The'},
                    inplace=True)
uni.query('city== "Nassau"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
11,12,FTX,$32,2021-07-20,"Bahamas, The",Nassau,Fintech,"Sequoia Capital, Thoma Bravo, Softbank",32.0,Nassau


# Changing Indian cities names

In [47]:
WC.query('country =="India"')

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
2,Delhi,Delhi,28.6600,77.2300,India,IN,IND,Delhi,admin,29617000.0,1356872604
3,Mumbai,Mumbai,18.9667,72.8333,India,IN,IND,Mahārāshtra,admin,23355000.0,1356226629
13,Kolkāta,Kolkata,22.5411,88.3378,India,IN,IND,West Bengal,admin,17560000.0,1356060520
23,Bangalore,Bangalore,12.9699,77.5980,India,IN,IND,Karnātaka,admin,13707000.0,1356410365
31,Chennai,Chennai,13.0825,80.2750,India,IN,IND,Tamil Nādu,admin,11324000.0,1356374944
...,...,...,...,...,...,...,...,...,...,...,...
32169,Manāli,Manali,32.2044,77.1700,India,IN,IND,Himāchal Pradesh,,8096.0,1356961623
34213,Amarkantak,Amarkantak,22.6751,81.7596,India,IN,IND,Chhattīsgarh,,7074.0,1356062517
35964,Pedda Nandipādu,Pedda Nandipadu,16.0728,80.3294,India,IN,IND,Andhra Pradesh,,6090.0,1356679236
36154,Loutolim,Loutolim,15.3300,73.9800,India,IN,IND,Goa,,6121.0,1356023315


In [48]:
uni["city"].replace({'Bengaluru': 'Bangalore', "Andheri": 'Mumbai','Uttar Pradesh': 'Lucknow',
                    'Maharashtra': 'Mumbai',"Gurugram": "Gurgaon"},
                    inplace=True)

In [49]:
uni.query('country== "India"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
14,15,BYJU's,$22,2017-07-25,India,Bangalore,Edtech,"Tencent Holdings, Lightspeed India Partners, S...",22.0,Bengaluru
46,47,Swiggy,$10.7,2018-06-21,India,Bangalore,"Supply chain, logistics, & delivery","Accel India, SAIF Partners, Norwest Venture Pa...",10.7,Bengaluru
65,66,OYO Rooms,$9,2018-09-25,India,Gurgaon,Travel,"SoftBank Group, Sequoia Capital India,Lightspe...",9.0,Gurugram
77,78,Dream11,$8,2019-04-09,India,Mumbai,Internet software & services,"Kaalari Capital, Tencent Holdings, Steadview C...",8.0,Mumbai
84,85,Razorpay,$7.5,2020-10-11,India,Bangalore,Fintech,"Sequoia Capital India, Tiger Global Management...",7.5,Bengaluru
...,...,...,...,...,...,...,...,...,...,...
1107,1108,Fractal Analytics,$1,2022-01-05,India,Mumbai,Data management & analytics,"TPG Capital, Apax Partners, TA Associates",1.0,Mumbai
1112,1113,Darwinbox,$1,2022-01-25,India,Hyderabad,Internet software & services,"Lightspeed India Partners, Sequoia Capital Ind...",1.0,Hyderabad
1123,1124,Hasura,$1,2022-02-22,India,Bangalore,Internet software & services,"Nexus Venture Partners, Vertex Ventures, STRIVE",1.0,Bengaluru
1131,1132,Amagi,$1,2022-03-16,India,Bangalore,Internet software & services,"Mayfield, Accel, Norwest Venture Partners",1.0,Bengaluru


# Other cities

In [50]:
WC.query('country== "Croatia"')

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
736,Zagreb,Zagreb,45.8,15.95,Croatia,HR,HRV,"Zagreb, Grad",primary,790017.0,1191233290
3064,Split,Split,43.51,16.45,Croatia,HR,HRV,Splitsko-Dalmatinska Županija,admin,178102.0,1191440208
3934,Rijeka,Rijeka,45.3272,14.4411,Croatia,HR,HRV,Primorsko-Goranska Županija,admin,128624.0,1191949063
5171,Osijek,Osijek,45.5603,18.6703,Croatia,HR,HRV,Osječko-Baranjska Županija,admin,84104.0,1191917921
5323,Zadar,Zadar,44.1167,15.2167,Croatia,HR,HRV,Zadarska Županija,admin,75082.0,1191160875
5775,Slavonski Brod,Slavonski Brod,45.1553,18.0144,Croatia,HR,HRV,Brodsko-Posavska Županija,admin,56769.0,1191097030
6138,Šibenik,Sibenik,43.7339,15.8956,Croatia,HR,HRV,Šibensko-Kninska Županija,admin,46332.0,1191444936
6261,Dubrovnik,Dubrovnik,42.6403,18.1083,Croatia,HR,HRV,Dubrovačko-Neretvanska Županija,admin,42615.0,1191004286
6356,Bjelovar,Bjelovar,45.8989,16.8422,Croatia,HR,HRV,Bjelovarsko-Bilogorska Županija,admin,40276.0,1191340640
6718,Sisak,Sisak,45.4872,16.3761,Croatia,HR,HRV,Sisačko-Moslavačka Županija,admin,33322.0,1191526652


In [51]:
WC.query('city== "Lerma"')

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
4752,Lerma,Lerma,19.2847,-99.5119,Mexico,MX,MEX,México,minor,105578.0,1484402392
8304,Lerma,Lerma,19.8,-90.6,Mexico,MX,MEX,Campeche,,8281.0,1484035396


In [52]:
uni.query('country== "South Africa"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
582,583,Promasidor Holdings,$1.59,2016-11-08,South Africa,Bryanston,Consumer & retail,"IFC, Ajinomoto",1.59,Bryanston
910,911,Cell C,$1,2017-08-07,South Africa,Midrand,Mobile & telecommunications,"Blue Label Telecoms, Net1 UEPS Technologies",1.0,Midrand


In [53]:
uni.query('country== "Mexico"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
67,68,Kavak,$8.7,2020-10-01,Mexico,Lerma de Villada,E-commerce & direct-to-consumer,"DST Global, SoftBank Group, Mountain Nazca",8.7,Lerma de Villada
404,405,Bitso,$2.2,2021-05-05,Mexico,Mexico City,Fintech,"Pantera Capital, QED Investors, Coinbase Ventures",2.2,Mexico City
463,464,Clip,$2,2021-06-10,Mexico,Mexico City,Fintech,"Alta Ventures Mexico, General Atlantic, SoftBa...",2.0,Mexico City
720,721,Konfio,$1.3,2021-09-29,Mexico,Mexico City,Fintech,"Kaszek Ventures, QED Investors, International ...",1.3,Mexico City
795,796,Merama,$1.2,2021-12-09,Mexico,Mexico City,E-commerce & direct-to-consumer,"SoftBank Latin America Fund, Advent Internatio...",1.2,Mexico City
840,841,Nowports,$1.1,2022-05-24,Mexico,Monterrey,"Supply chain, logistics, & delivery","Monashees+, Foundation Capital, Base10 Partners",1.1,Monterrey
1089,1090,Clara,$1,2021-12-06,Mexico,Mexico City,Fintech,"DST Global, General Catalyst, Monashees+",1.0,Mexico City


In [54]:
WC.query('city== "Unterföhring"')

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
26165,Unterföhring,Unterfohring,48.1917,11.6528,Germany,DE,DEU,Bavaria,,11296.0,1276741944


In [55]:
uni["city"].replace({'Lerma de Villada': 'Lerma', "Sveta Nedelja": 'Zagreb','Vodnjan': 'Pula',
                    'Surry Hills': 'Sydney',"Alexandria": "Sydney", "Ghent":"Gent", "DC":"Washington ",
                    "Washington DC":"Washington", "Islandia":"Suffolk", "Sherman Oaks":"Los Angeles",
                    "Portola Valley":"San Francisco","Midrand":"Johannesburg","Bryanston":"Johannesburg",
                    "Selangor":"Petaling Jaya","Lysaker":"Oslo","Unterfoehring":"Unterfohring",
                    "La Plaine Saint-Denis":"Saint-Denis", "Kista":"Stockholm"},
                    inplace=True)

In [56]:
uni.query('city== "Brisbane"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city,industry,select investors,value,cities
1029,1030,GO1,$1,2021-07-19,Australia,Brisbane,Internet software & services,"Y Combinator, M12, SEEK",1.0,Brisbane
1047,1048,Mammoth Biosciences,$1,2021-09-09,United States,Brisbane,Health,"NFX, Plum Alley, Mayfield",1.0,Brisbane


In [57]:
uni.loc[uni["ranking_companies"] == 1048, "city"] = "San Francisco"

In [58]:
WC.query('admin_name=="Jiangsu"')

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
49,Tongshan,Tongshan,34.261,117.1859,China,CN,CHN,Jiangsu,minor,8669000.0,1156241678
66,Nanjing,Nanjing,32.05,118.7667,China,CN,CHN,Jiangsu,admin,7496000.0,1156644065
74,Nantong,Nantong,31.9829,120.8873,China,CN,CHN,Jiangsu,minor,7282835.0,1156045365
75,Yancheng,Yancheng,33.3936,120.1339,China,CN,CHN,Jiangsu,minor,7260240.0,1156995410
135,Suqian,Suqian,33.9331,118.2831,China,CN,CHN,Jiangsu,,5290000.0,1156212349
137,Suzhou,Suzhou,31.304,120.6164,China,CN,CHN,Jiangsu,minor,5250000.0,1156029196
147,Taizhou,Taizhou,32.4831,119.9,China,CN,CHN,Jiangsu,minor,5031000.0,1156119229
159,Huai’an,Huai'an,33.5,119.1331,China,CN,CHN,Jiangsu,,4799889.0,1156634228
160,Xinpu,Xinpu,34.5906,119.1801,China,CN,CHN,Jiangsu,minor,4790000.0,1156035381
168,Changzhou,Changzhou,31.8122,119.9692,China,CN,CHN,Jiangsu,minor,4592431.0,1156185511


In [59]:
uni["city"].replace({'Leudelange': 'Esch-sur-Alzette', "Banyeres de Mariola":"Alicante",
                    "Zephyr Cove":"Carson City","Noida":"New Delhi", "Hunan":"Changsha",
                    "Jiangsu":"Nanjing", "Seongnam-Si":"Songnam", "Jeju-do":"Jeju"},
                    inplace=True)

In [77]:
uni.loc[uni["ranking_companies"] == 646, "city"] = "Washington"

# Merge

In [78]:
union=pd.merge(uni,WC, how="left", left_on=("city", "country"),right_on=("city_ascii","country"))
union.shape

(1507, 20)

In [79]:
union.tail(10)

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city_x,industry,select investors,value,cities,city_y,city_ascii,lat,lng,iso2,iso3,admin_name,capital,population,id
1497,1151,Opn,$1,2022-05-09,Japan,Tokyo,Fintech,"Sinar Mas Digital Ventures, JIC Venture Growth...",1.0,Tokyo,Tokyo,Tokyo,35.6897,139.6922,JP,JPN,Tōkyō,primary,37977000.0,1392685764
1498,1152,Remarkable,$1,2022-05-10,Norway,Oslo,Internet software & services,Spark Capital,1.0,Oslo,Oslo,Oslo,59.9111,10.7528,NO,NOR,Oslo,primary,693494.0,1578324706
1499,1153,Habi,$1,2022-05-11,Colombia,Bogota,Fintech,"Homebrew, Inspired Capital, Tiger Global Manag...",1.0,Bogota,Bogotá,Bogota,4.6126,-74.0705,CO,COL,Bogotá,primary,9464000.0,1170483426
1500,1154,Glean,$1,2022-05-18,United States,Palo Alto,Internet software & services,"General Catalyst, Kleiner Perkins Caufield & B...",1.0,Palo Alto,Palo Alto,Palo Alto,37.3913,-122.1468,US,USA,California,,65364.0,1840020333
1501,1155,CareBridge,$1,2022-06-08,United States,Nashville,Health,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Nashville,Nashville,Nashville,36.1715,-86.7843,US,USA,Tennessee,admin,1081903.0,1840036155
1502,1155,CareBridge,$1,2022-06-08,United States,Nashville,Health,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Nashville,Nashville,Nashville,35.9692,-77.9555,US,USA,North Carolina,,5554.0,1840016167
1503,1156,Immuta,$1,2022-06-08,United States,Boston,Data management & analytics,"DFJ Growth Fund, Dell Technologies Capital, Te...",1.0,Boston,Boston,Boston,42.3188,-71.0846,US,USA,Massachusetts,admin,4688346.0,1840000455
1504,1156,Immuta,$1,2022-06-08,United States,Boston,Data management & analytics,"DFJ Growth Fund, Dell Technologies Capital, Te...",1.0,Boston,Boston,Boston,42.6528,-78.7555,US,USA,New York,,8042.0,1840023297
1505,1157,JupiterOne,$1,2022-06-02,United States,Morrisville,Cybersecurity,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Morrisville,Morrisville,Morrisville,35.8367,-78.8348,US,USA,North Carolina,,28846.0,1840016195
1506,1157,JupiterOne,$1,2022-06-02,United States,Morrisville,Cybersecurity,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Morrisville,Morrisville,Morrisville,40.2074,-74.78,US,USA,Pennsylvania,,8521.0,1840001326


In [80]:
without_duplicates = union.drop_duplicates(subset=["ranking_companies"],keep="first")
without_duplicates

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city_x,industry,select investors,value,cities,city_y,city_ascii,lat,lng,iso2,iso3,admin_name,capital,population,id
0,1,Bytedance,$140,2017-04-07,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",140.0,Beijing,Beijing,Beijing,39.9050,116.3914,CN,CHN,Beijing,primary,19433000.0,1156228865
1,2,SpaceX,$125,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",125.0,Hawthorne,Hawthorne,Hawthorne,33.9146,-118.3476,US,USA,California,,86068.0,1840020486
3,3,SHEIN,$100,2018-07-03,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",100.0,Shenzhen,Shenzhen,Shenzhen,22.5350,114.0540,CN,CHN,Guangdong,minor,15929000.0,1156158707
4,4,Stripe,$95,2014-01-23,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",95.0,San Francisco,San Francisco,San Francisco,37.7562,-122.4430,US,USA,California,,3592294.0,1840021543
5,5,Klarna,$45.6,2011-12-12,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita...",45.6,Stockholm,Stockholm,Stockholm,59.3294,18.0686,SE,SWE,Stockholm,primary,972647.0,1752425602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1499,1153,Habi,$1,2022-05-11,Colombia,Bogota,Fintech,"Homebrew, Inspired Capital, Tiger Global Manag...",1.0,Bogota,Bogotá,Bogota,4.6126,-74.0705,CO,COL,Bogotá,primary,9464000.0,1170483426
1500,1154,Glean,$1,2022-05-18,United States,Palo Alto,Internet software & services,"General Catalyst, Kleiner Perkins Caufield & B...",1.0,Palo Alto,Palo Alto,Palo Alto,37.3913,-122.1468,US,USA,California,,65364.0,1840020333
1501,1155,CareBridge,$1,2022-06-08,United States,Nashville,Health,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Nashville,Nashville,Nashville,36.1715,-86.7843,US,USA,Tennessee,admin,1081903.0,1840036155
1503,1156,Immuta,$1,2022-06-08,United States,Boston,Data management & analytics,"DFJ Growth Fund, Dell Technologies Capital, Te...",1.0,Boston,Boston,Boston,42.3188,-71.0846,US,USA,Massachusetts,admin,4688346.0,1840000455


In [81]:
union['ranking_companies'].value_counts().head(10)

818     16
1020    16
776     16
646     16
896     16
357      9
374      8
367      7
788      7
532      7
Name: ranking_companies, dtype: int64

In [82]:
without_duplicates.shape

(1157, 20)

In [83]:
without_duplicates.tail(10)

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city_x,industry,select investors,value,cities,city_y,city_ascii,lat,lng,iso2,iso3,admin_name,capital,population,id
1494,1148,Taxfix,$1,2022-04-28,Germany,Berlin,Fintech,"Valar Ventures, Index Ventures, Creandum",1.0,Berlin,Berlin,Berlin,52.5167,13.3833,DE,DEU,Berlin,primary,3644826.0,1276451290
1495,1149,Open,$1,2022-05-02,India,Bangalore,Fintech,"3one4 Capital Partners, Tiger Global Managemen...",1.0,Bengaluru,Bangalore,Bangalore,12.9699,77.598,IN,IND,Karnātaka,admin,13707000.0,1356410365
1496,1150,Polarium,$1,2022-05-03,Sweden,Stockholm,Other,AMF Pensionsforsakring,1.0,Kista,Stockholm,Stockholm,59.3294,18.0686,SE,SWE,Stockholm,primary,972647.0,1752425602
1497,1151,Opn,$1,2022-05-09,Japan,Tokyo,Fintech,"Sinar Mas Digital Ventures, JIC Venture Growth...",1.0,Tokyo,Tokyo,Tokyo,35.6897,139.6922,JP,JPN,Tōkyō,primary,37977000.0,1392685764
1498,1152,Remarkable,$1,2022-05-10,Norway,Oslo,Internet software & services,Spark Capital,1.0,Oslo,Oslo,Oslo,59.9111,10.7528,NO,NOR,Oslo,primary,693494.0,1578324706
1499,1153,Habi,$1,2022-05-11,Colombia,Bogota,Fintech,"Homebrew, Inspired Capital, Tiger Global Manag...",1.0,Bogota,Bogotá,Bogota,4.6126,-74.0705,CO,COL,Bogotá,primary,9464000.0,1170483426
1500,1154,Glean,$1,2022-05-18,United States,Palo Alto,Internet software & services,"General Catalyst, Kleiner Perkins Caufield & B...",1.0,Palo Alto,Palo Alto,Palo Alto,37.3913,-122.1468,US,USA,California,,65364.0,1840020333
1501,1155,CareBridge,$1,2022-06-08,United States,Nashville,Health,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Nashville,Nashville,Nashville,36.1715,-86.7843,US,USA,Tennessee,admin,1081903.0,1840036155
1503,1156,Immuta,$1,2022-06-08,United States,Boston,Data management & analytics,"DFJ Growth Fund, Dell Technologies Capital, Te...",1.0,Boston,Boston,Boston,42.3188,-71.0846,US,USA,Massachusetts,admin,4688346.0,1840000455
1505,1157,JupiterOne,$1,2022-06-02,United States,Morrisville,Cybersecurity,"Bain Capital Ventures, Splunk Ventures, Cisco ...",1.0,Morrisville,Morrisville,Morrisville,35.8367,-78.8348,US,USA,North Carolina,,28846.0,1840016195


In [84]:
union.query('country== "Colombia"')

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city_x,industry,select investors,value,cities,city_y,city_ascii,lat,lng,iso2,iso3,admin_name,capital,population,id
158,128,Rappi,$5.25,2018-08-31,Colombia,Bogota,"Supply chain, logistics, & delivery","DST Global, Andreessen Horowitz, Sequoia Capit...",5.25,Bogota,Bogotá,Bogota,4.6126,-74.0705,CO,COL,Bogotá,primary,9464000.0,1170483426
1063,815,LifeMiles,$1.15,2015-07-13,Colombia,Bogota,Other,Advent International,1.15,Bogota,Bogotá,Bogota,4.6126,-74.0705,CO,COL,Bogotá,primary,9464000.0,1170483426
1499,1153,Habi,$1,2022-05-11,Colombia,Bogota,Fintech,"Homebrew, Inspired Capital, Tiger Global Manag...",1.0,Bogota,Bogotá,Bogota,4.6126,-74.0705,CO,COL,Bogotá,primary,9464000.0,1170483426


In [85]:
without_duplicates.to_csv('unicorns_co.csv', index=False)

In [86]:
import pandas as pd
import seaborn as sns
import numpy as np 
from datetime import datetime, timedelta
from matplotlib import pyplot as plt

# Select Columns

In [87]:
comp = pd.read_csv('./unicorns_co.csv',sep=",")
comp.head(3)

Unnamed: 0,ranking_companies,company,valuation ($b),date joined,country,city_x,industry,select investors,value,cities,city_y,city_ascii,lat,lng,iso2,iso3,admin_name,capital,population,id
0,1,Bytedance,$140,2017-04-07,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",140.0,Beijing,Beijing,Beijing,39.905,116.3914,CN,CHN,Beijing,primary,19433000.0,1156228865
1,2,SpaceX,$125,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",125.0,Hawthorne,Hawthorne,Hawthorne,33.9146,-118.3476,US,USA,California,,86068.0,1840020486
2,3,SHEIN,$100,2018-07-03,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",100.0,Shenzhen,Shenzhen,Shenzhen,22.535,114.054,CN,CHN,Guangdong,minor,15929000.0,1156158707


In [88]:
comp.columns

Index(['ranking_companies', 'company', 'valuation ($b)', 'date joined',
       'country', 'city_x', 'industry', 'select investors', 'value', 'cities',
       'city_y', 'city_ascii', 'lat', 'lng', 'iso2', 'iso3', 'admin_name',
       'capital', 'population', 'id'],
      dtype='object')

In [89]:
selection=comp.drop(['valuation ($b)', 'cities','city_y','city_ascii','iso2','iso3','admin_name'], axis=1, inplace=True)
selection

In [90]:
comp.head()

Unnamed: 0,ranking_companies,company,date joined,country,city_x,industry,select investors,value,lat,lng,capital,population,id
0,1,Bytedance,2017-04-07,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",140.0,39.905,116.3914,primary,19433000.0,1156228865
1,2,SpaceX,2012-12-01,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",125.0,33.9146,-118.3476,,86068.0,1840020486
2,3,SHEIN,2018-07-03,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",100.0,22.535,114.054,minor,15929000.0,1156158707
3,4,Stripe,2014-01-23,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",95.0,37.7562,-122.443,,3592294.0,1840021543
4,5,Klarna,2011-12-12,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita...",45.6,59.3294,18.0686,primary,972647.0,1752425602


In [91]:
comp.query('country== "Sweden"')

Unnamed: 0,ranking_companies,company,date joined,country,city_x,industry,select investors,value,lat,lng,capital,population,id
4,5,Klarna,2011-12-12,Sweden,Stockholm,Fintech,"Institutional Venture Partners, Sequoia Capita...",45.6,59.3294,18.0686,primary,972647.0,1752425602
62,63,Northvolt,2019-06-12,Sweden,Stockholm,Other,"Vattenfall, Volkswagen Group, Goldman Sachs",9.08,59.3294,18.0686,primary,972647.0,1752425602
455,456,KRY,2021-04-27,Sweden,Stockholm,Health,"Index Ventures, Creandum, Accel",2.0,59.3294,18.0686,primary,972647.0,1752425602
658,659,Einride,2021-12-31,Sweden,Stockholm,Auto & transportation,"Temasek,\tBUILD Capital Partners, \tNorthzone ...",1.44,59.3294,18.0686,primary,972647.0,1752425602
676,677,Epidemic Sound,2021-03-11,Sweden,Stockholm,Internet software & services,"EQT Partners, Blackstone",1.4,59.3294,18.0686,primary,972647.0,1752425602
1102,1103,VOI,2021-12-21,Sweden,Stockholm,Travel,"Vostok New Ventures, The Raine Group, Balderto...",1.0,59.3294,18.0686,primary,972647.0,1752425602
1144,1145,Instabox,2022-04-21,Sweden,Stockholm,"Supply chain, logistics, & delivery","Creades, EQT Ventures, Verdane Capital",1.0,59.3294,18.0686,primary,972647.0,1752425602
1149,1150,Polarium,2022-05-03,Sweden,Stockholm,Other,AMF Pensionsforsakring,1.0,59.3294,18.0686,primary,972647.0,1752425602


In [92]:
selected_ = comp[comp['id'].isnull()]
selected_

Unnamed: 0,ranking_companies,company,date joined,country,city_x,industry,select investors,value,lat,lng,capital,population,id


In [93]:
selected_['city_x'].value_counts()

Series([], Name: city_x, dtype: int64)

In [94]:
selected_['country'].value_counts()

Series([], Name: country, dtype: int64)

In [95]:
comp.columns

Index(['ranking_companies', 'company', 'date joined', 'country', 'city_x',
       'industry', 'select investors', 'value', 'lat', 'lng', 'capital',
       'population', 'id'],
      dtype='object')

In [96]:
comp.columns.values[1] = "unicorn"
comp.columns.values[2] = "date_joined"
comp.columns.values[4] = "city"
comp.columns.values[6] = "selected_investors"
comp.columns.values[12] = "id_city"

In [97]:
comp.dtypes

ranking_companies       int64
unicorn                object
date_joined            object
country                object
city                   object
industry               object
selected_investors     object
value                 float64
lat                   float64
lng                   float64
capital                object
population            float64
id_city                 int64
dtype: object

In [99]:
comp.to_csv('unicorns_2022.csv', index=False)