## Lendo os dados e importando o Pandas

In [4]:
import pandas as pd

In [5]:
dados = pd.read_csv('salaries_by_college_major.csv', sep=",")

## Tratando os dados

In [6]:
dados.shape

(51, 6)

In [7]:
dados.columns

Index(['Undergraduate Major', 'Starting Median Salary',
       'Mid-Career Median Salary', 'Mid-Career 10th Percentile Salary',
       'Mid-Career 90th Percentile Salary', 'Group'],
      dtype='object')

In [8]:
dados.isna().sum()

Undergraduate Major                  0
Starting Median Salary               1
Mid-Career Median Salary             1
Mid-Career 10th Percentile Salary    1
Mid-Career 90th Percentile Salary    1
Group                                1
dtype: int64

In [9]:
dados.tail()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
46,Psychology,35900.0,60400.0,31600.0,127000.0,HASS
47,Religion,34100.0,52000.0,29700.0,96400.0,HASS
48,Sociology,36500.0,58200.0,30700.0,118000.0,HASS
49,Spanish,34000.0,53100.0,31000.0,96400.0,HASS
50,Source: PayScale Inc.,,,,,


In [10]:
dados_limpos = dados.dropna()
dados_limpos.tail()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
45,Political Science,40800.0,78200.0,41200.0,168000.0,HASS
46,Psychology,35900.0,60400.0,31600.0,127000.0,HASS
47,Religion,34100.0,52000.0,29700.0,96400.0,HASS
48,Sociology,36500.0,58200.0,30700.0,118000.0,HASS
49,Spanish,34000.0,53100.0,31000.0,96400.0,HASS


## Acessando colunas e celulas individuais

In [11]:
dados_limpos['Starting Median Salary'].max()

74300.0

In [12]:
dados_limpos['Starting Median Salary'].idxmax()

43

In [13]:
dados_limpos['Starting Median Salary'][43]

74300.0

In [14]:
dados_limpos['Undergraduate Major'].loc[43]

'Physician Assistant'

In [15]:
dados_limpos.loc[43]

Undergraduate Major                  Physician Assistant
Starting Median Salary                           74300.0
Mid-Career Median Salary                         91700.0
Mid-Career 10th Percentile Salary                66400.0
Mid-Career 90th Percentile Salary               124000.0
Group                                               STEM
Name: 43, dtype: object

## Salãrio: + alto + baixo
### Escolaridade

In [16]:
print(dados_limpos['Mid-Career Median Salary'].max())
print(f"Indice para o maior salario no meio da carreira: {dados_limpos['Mid-Career Median Salary'].idxmax()}")
dados_limpos['Undergraduate Major'][8]

107000.0
Indice para o maior salario no meio da carreira: 8


'Chemical Engineering'

In [17]:
print(dados_limpos['Starting Median Salary'].min())
dados_limpos['Undergraduate Major'].loc[dados_limpos['Starting Median Salary'].idxmin()]

34000.0


'Spanish'

In [18]:
dados_limpos.loc[dados_limpos['Mid-Career Median Salary'].idxmin()]

Undergraduate Major                  Education
Starting Median Salary                 34900.0
Mid-Career Median Salary               52000.0
Mid-Career 10th Percentile Salary      29300.0
Mid-Career 90th Percentile Salary     102000.0
Group                                     HASS
Name: 18, dtype: object

### Classificando valores e adicionando algumas colunas

In [19]:
col_media = dados_limpos['Mid-Career 90th Percentile Salary'] - dados_limpos['Mid-Career 10th Percentile Salary']
dados_limpos.insert(1, 'Media', col_media)
dados_limpos.head()

Unnamed: 0,Undergraduate Major,Media,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
0,Accounting,109800.0,46000.0,77100.0,42200.0,152000.0,Business
1,Aerospace Engineering,96700.0,57700.0,101000.0,64300.0,161000.0,STEM
2,Agriculture,113700.0,42600.0,71900.0,36300.0,150000.0,Business
3,Anthropology,104200.0,36800.0,61500.0,33800.0,138000.0,HASS
4,Architecture,85400.0,41600.0,76800.0,50600.0,136000.0,Business


In [20]:
risco_baixo = dados_limpos.sort_values('Media')
risco_baixo[['Undergraduate Major', 'Media']].head()

Unnamed: 0,Undergraduate Major,Media
40,Nursing,50700.0
43,Physician Assistant,57600.0
41,Nutrition,65300.0
49,Spanish,65400.0
27,Health Care Administration,66400.0


### Escolarização com maior potencial

In [21]:
maior_potencial = dados_limpos.sort_values('Mid-Career 90th Percentile Salary', ascending=False)
maior_potencial[['Undergraduate Major', 'Mid-Career 90th Percentile Salary']].head()

Unnamed: 0,Undergraduate Major,Mid-Career 90th Percentile Salary
17,Economics,210000.0
22,Finance,195000.0
8,Chemical Engineering,194000.0
37,Math,183000.0
44,Physics,178000.0


In [22]:
maior_media = dados_limpos.sort_values('Media', ascending=False)
maior_media[['Undergraduate Major', 'Media']].head()

Unnamed: 0,Undergraduate Major,Media
17,Economics,159400.0
22,Finance,147800.0
37,Math,137800.0
36,Marketing,132900.0
42,Philosophy,132500.0


In [23]:
maior_media = dados_limpos.sort_values('Mid-Career Median Salary', ascending=False)
maior_media[['Undergraduate Major', 'Mid-Career Median Salary']].head()

Unnamed: 0,Undergraduate Major,Mid-Career Median Salary
8,Chemical Engineering,107000.0
12,Computer Engineering,105000.0
19,Electrical Engineering,103000.0
1,Aerospace Engineering,101000.0
17,Economics,98600.0


## Usando Groups

In [24]:
dados_limpos.groupby('Group').count()

Unnamed: 0_level_0,Undergraduate Major,Media,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Business,12,12,12,12,12,12
HASS,22,22,22,22,22,22
STEM,16,16,16,16,16,16


In [25]:
pd.options.display.float_format = '{:,.2f}'.format
dados_limpos.groupby('Group').mean()

Unnamed: 0_level_0,Media,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Business,103958.33,44633.33,75083.33,43566.67,147525.0
HASS,95218.18,37186.36,62968.18,34145.45,129363.64
STEM,101600.0,53862.5,90812.5,56025.0,157625.0
