# Análise de dados dos Startups
Unicórnios também podem se referir a um fenômeno de recrutamento no setor de recursos humanos (RH). Os gerentes de RH podem ter grandes expectativas para preencher um cargo, levando-os a procurar candidatos com qualificações superiores às exigidas para um cargo específico. Em essência, esses gerentes estão procurando um unicórnio, o que leva a uma desconexão entre seu candidato ideal e quem eles podem contratar do grupo de pessoas disponíveis.

In [48]:
#Importando as bibliotecas
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

import warnings 
warnings.filterwarnings('ignore')

In [49]:
#Ler o arquivo
data = pd.read_csv('unicorns till sep 2022.csv')

In [50]:
data.head()

Unnamed: 0,Company,Valuation ($B),Date Joined,Country,City,Industry,Investors
0,ByteDance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S..."
1,SpaceX,$127,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,SHEIN,$100,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China..."
3,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG"
4,Canva,$40,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat..."


In [51]:
#Formato
data.shape

(1186, 7)

In [52]:
data.columns

Index(['Company', 'Valuation ($B)', 'Date Joined', 'Country', 'City ',
       'Industry', 'Investors'],
      dtype='object')

In [53]:
#Renomear as colunas
data.rename(columns={'Company' : 'Empresa', 'Valuation ($B)' : 'Valor ($B)', 'Date Joined' : 'Data de adesão', 
                     'Country' : 'Pais', 'City ' : 'Cidade','Industry' : 'Setor', 'Investors' : 'Investidores'}, inplace=True)

In [54]:
data.head()

Unnamed: 0,Empresa,Valor ($B),Data de adesão,Pais,City,Setor,Investidores
0,ByteDance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S..."
1,SpaceX,$127,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,SHEIN,$100,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China..."
3,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG"
4,Canva,$40,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat..."


In [56]:
#Verificar a informação 
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1186 entries, 0 to 1185
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Empresa         1186 non-null   object
 1   Valor ($B)      1186 non-null   object
 2   Data de adesão  1186 non-null   object
 3   Pais            1186 non-null   object
 4   City            1186 non-null   object
 5   Setor           1186 non-null   object
 6   Investidores    1168 non-null   object
dtypes: object(7)
memory usage: 65.0+ KB


In [58]:
#Campos nulos
data.isnull().sum()

Empresa            0
Valor ($B)         0
Data de adesão     0
Pais               0
City               0
Setor              0
Investidores      18
dtype: int64

In [60]:
# Grafica
plt.figure( figsize=(10,6) )
plt.title('Analisando Campos Nulos')
sns.heatmap( data.isnull(), cbar=False );

<IPython.core.display.Javascript object>

In [61]:
# Campos unicos
data.nunique()

Empresa           1183
Valor ($B)         222
Data de adesão     695
Pais                48
City               286
Setor               34
Investidores      1152
dtype: int64

In [62]:
# Valores Unicos
data['Setor'].unique()

array(['Artificial intelligence', 'Other',
       'E-commerce & direct-to-consumer', 'Fintech',
       'Internet software & services',
       'Supply chain, logistics, & delivery',
       'Data management & analytics',
       'Sequoia Capital, Thoma Bravo, Softbank', 'Edtech', 'Hardware',
       'Consumer & retail', 'Health', 'Auto & transportation',
       'Cybersecurity', 'Mobile & telecommunications', 'Travel',
       'Kuang-Chi',
       'Tiger Global Management, Tiger Brokers, DCM Ventures',
       'Jungle Ventures, Accel, Venture Highway',
       'Artificial Intelligence', 'GIC. Apis Partners, Insight Partners',
       'Vision Plus Capital, GSR Ventures, ZhenFund',
       'Hopu Investment Management, Boyu Capital, DC Thomson Ventures',
       'Internet', '500 Global, Rakuten Ventures, Golden Gate Ventures',
       'Sequoia Capital China, ING, Alibaba Entrepreneurs Fund',
       'Sequoia Capital China, Shunwei Capital Partners, Qualgro',
       'Dragonfly Captial, Qiming Venture Pa

In [63]:
# Valores Unicos - Rank
data['Setor'].value_counts()

Fintech                                                               239
Internet software & services                                          224
E-commerce & direct-to-consumer                                       103
Health                                                                 94
Artificial intelligence                                                74
Supply chain, logistics, & delivery                                    65
Other                                                                  65
Cybersecurity                                                          58
Data management & analytics                                            45
Auto & transportation                                                  40
Hardware                                                               38
Mobile & telecommunications                                            37
Edtech                                                                 32
Consumer & retail                     

In [64]:
# Valores Unicos - Rank
data['Setor'].value_counts( normalize=True )

Fintech                                                               0.201518
Internet software & services                                          0.188870
E-commerce & direct-to-consumer                                       0.086847
Health                                                                0.079258
Artificial intelligence                                               0.062395
Supply chain, logistics, & delivery                                   0.054806
Other                                                                 0.054806
Cybersecurity                                                         0.048904
Data management & analytics                                           0.037943
Auto & transportation                                                 0.033727
Hardware                                                              0.032040
Mobile & telecommunications                                           0.031197
Edtech                                              

In [66]:
plt.figure( figsize=(10,6) )
plt.title('Analise dos Setores')
plt.bar( data['Setor'].value_counts().index, data['Setor'].value_counts()  )
plt.xticks( rotation=45, ha='right' );

<IPython.core.display.Javascript object>

In [67]:
Analise = round( data['Pais'].value_counts( normalize=True ) * 100, 1 )

In [69]:
# Plot geral dos Paises
plt.figure( figsize=(10,6) )
plt.title('Analise dos Paises gerador de Unicornios')
plt.pie(
    Analise,
    labels = Analise.index,
    shadow=True,
    startangle=90,
    autopct='%1.1f%%'
);

<IPython.core.display.Javascript object>

In [70]:
# Plot geral dos Paises
plt.figure( figsize=(10,6) )
plt.title('Analise dos Paises gerador de Unicornios - Top 10')
plt.pie(
    Analise.head(10),
    labels = Analise.index[0:10],
    shadow=True,
    startangle=90,
    autopct='%1.1f%%'
);

<IPython.core.display.Javascript object>

In [73]:
# Conversão para Data
data['Data de Adesão'] = pd.to_datetime( data['Data de adesão'] )

data['Data de Adesão'].head()

0   2017-04-07
1   2012-12-01
2   2018-07-03
3   2014-01-23
4   2018-01-08
Name: Data de Adesão, dtype: datetime64[ns]

In [75]:
# Extrair o Ano e Mes
data['Mês'] = pd.DatetimeIndex( data['Data de adesão'] ).month
data['Ano'] = pd.DatetimeIndex( data['Data de Adesão'] ).year

data.head()

Unnamed: 0,Empresa,Valor ($B),Data de adesão,Pais,City,Setor,Investidores,Data de Adesão,Mês,Ano
0,ByteDance,$140,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",2017-04-07,4,2017
1,SpaceX,$127,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2012-12-01,12,2012
2,SHEIN,$100,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",2018-07-03,7,2018
3,Stripe,$95,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",2014-01-23,1,2014
4,Canva,$40,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat...",2018-01-08,1,2018


In [76]:
# Tabela Analitica
Analise_Agrupada = data.groupby( by=['Pais', 'Ano', 'Mês', 'Empresa'] ).count().reset_index()

Analise_Agrupada

Unnamed: 0,Pais,Ano,Mês,Empresa,Valor ($B),Data de adesão,City,Setor,Investidores,Data de Adesão
0,Argentina,2021,8,Uala,1,1,1,1,1,1
1,Australia,2018,1,Canva,1,1,1,1,1,1
2,Australia,2019,3,Airwallex,1,1,1,1,1,1
3,Australia,2021,5,SafetyCulture,1,1,1,1,1,1
4,Australia,2021,7,Culture Amp,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...
1181,United States,2022,8,Flow,1,1,1,1,1,1
1182,United States,2022,8,Incredible Health,1,1,1,1,1,1
1183,United States,2022,8,Orna Therapeutics,1,1,1,1,1,1
1184,Vietnam,2021,10,Sky Mavis,1,1,1,1,1,1


In [77]:
Analise_Agrupada.loc[
  Analise_Agrupada['Pais'] == 'Brazil'
]

Unnamed: 0,Pais,Ano,Mês,Empresa,Valor ($B),Data de adesão,City,Setor,Investidores,Data de Adesão
16,Brazil,2018,7,Movile,1,1,1,1,1,1
17,Brazil,2018,11,iFood,1,1,1,1,1,1
18,Brazil,2019,6,Loggi,1,1,1,1,1,1
19,Brazil,2019,9,QuintoAndar,1,1,1,1,1,1
20,Brazil,2019,10,EBANX,1,1,1,1,1,1
21,Brazil,2019,12,Wildlife Studios,1,1,1,1,1,1
22,Brazil,2020,1,Loft,1,1,1,1,1,1
23,Brazil,2020,12,C6 Bank,1,1,1,1,1,1
24,Brazil,2020,12,Creditas,1,1,1,1,1,1
25,Brazil,2021,1,MadeiraMadeira,1,1,1,1,1,1


In [78]:
# Transformando a coluna Valor
data['Valor ($B)'] = pd.to_numeric( data['Valor ($B)'].apply( lambda Linha: Linha.replace('$', '') ) )

data.head()

Unnamed: 0,Empresa,Valor ($B),Data de adesão,Pais,City,Setor,Investidores,Data de Adesão,Mês,Ano
0,ByteDance,140.0,4/7/2017,China,Beijing,Artificial intelligence,"Sequoia Capital China, SIG Asia Investments, S...",2017-04-07,4,2017
1,SpaceX,127.0,12/1/2012,United States,Hawthorne,Other,"Founders Fund, Draper Fisher Jurvetson, Rothen...",2012-12-01,12,2012
2,SHEIN,100.0,7/3/2018,China,Shenzhen,E-commerce & direct-to-consumer,"Tiger Global Management, Sequoia Capital China...",2018-07-03,7,2018
3,Stripe,95.0,1/23/2014,United States,San Francisco,Fintech,"Khosla Ventures, LowercaseCapital, capitalG",2014-01-23,1,2014
4,Canva,40.0,1/8/2018,Australia,Surry Hills,Internet software & services,"Sequoia Capital China, Blackbird Ventures, Mat...",2018-01-08,1,2018


In [80]:
# Tabela Analitica
Analise_Pais = data.groupby( by=['Pais'] ).sum()['Valor ($B)'].reset_index()

Analise_Pais.head()

Unnamed: 0,Pais,Valor ($B)
0,Argentina,2.45
1,Australia,54.4
2,Austria,7.61
3,Bahamas,32.0
4,Belgium,8.95


In [82]:
Analise_Valor = Analise_Pais.sort_values('Valor ($B)', ascending=False)
Analise_Valor.head()

Unnamed: 0,Pais,Valor ($B)
46,United States,2069.89
9,China,678.59
45,United Kingdom,205.45
20,India,202.92
18,Germany,80.88


In [83]:
plt.figure( figsize=(10,6) )
plt.plot( Analise_Valor['Pais'], Analise_Valor['Valor ($B)'] )
plt.title('Analise do Valor por Pais')
plt.xticks( rotation=45, ha='right');

<IPython.core.display.Javascript object>