# Load Libraries

In [1]:
import pandas as pd

# Load Dataset

In [2]:
df = pd.read_csv('census_income_data.csv')

In [3]:
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


# Using `sum()` on a dataframe

`sum()` can be uused on a single column or on multiple columns

In [5]:
df[['capital-gain','capital-loss']].sum()

capital-gain    35089324
capital-loss     2842700
dtype: int64

# Using `sum()` on a group

Using `value_counts()` to get an idea of how many people belong to each `workclass`

In [11]:
df["workclass"].value_counts()

workclass
Private             22696
Self-emp-not-inc     2541
Local-gov            2093
State-gov            1298
Self-emp-inc         1116
Federal-gov           960
Without-pay            14
Never-worked            7
Name: count, dtype: int64

using `groupby()` before `sum()` can help generate summations for specific groups

In [9]:
df.groupby("workclass").sum(numeric_only=True)

Unnamed: 0_level_0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week
workclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Federal-gov,40887,177812394,10535,799903,107778,39724
Local-gov,87385,394822919,23111,1842264,229925,85777
Never-worked,144,1581927,52,0,0,199
Private,835158,4374974348,224230,20181687,1815878,913902
Self-emp-inc,51355,196395180,12429,5441274,173135,54481
Self-emp-not-inc,114268,446221558,25985,4792483,296361,112876
State-gov,51188,239009324,14766,910806,108067,50663
Without-pay,669,2439745,127,6830,0,458


### A similar comparison with `occupation`

In [7]:
df['occupation'].value_counts()

occupation
Prof-specialty       4140
Craft-repair         4099
Exec-managerial      4066
Adm-clerical         3770
Sales                3650
Other-service        3295
Machine-op-inspct    2002
Transport-moving     1597
Handlers-cleaners    1370
Farming-fishing       994
Tech-support          928
Protective-serv       649
Priv-house-serv       149
Armed-Forces            9
Name: count, dtype: int64

`sort_values()` can help display the values more succintely 

In [13]:
df.groupby('occupation').sum(numeric_only=True).sort_values('hours-per-week',ascending=False)

Unnamed: 0_level_0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week
occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Exec-managerial,171460,749827375,46552,9200435,564517,182920
Prof-specialty,167743,767127979,53427,11288536,563617,175481
Craft-repair,159990,787551526,37345,2662353,362619,173405
Sales,136342,696733506,37594,4817377,358797,148851
Adm-clerical,139356,724003604,38128,1869750,229195,141595
Other-service,115159,621464850,28929,630339,126036,114342
Machine-op-inspct,75506,390471837,16992,658036,123536,81593
Transport-moving,64196,304015069,14001,783047,130125,71316
Handlers-cleaners,44067,280015679,11659,352875,62521,51988
Farming-fishing,40964,169169035,8557,586188,62697,46708


### Another comparison for martial-status

In [15]:
df['marital-status'].value_counts()

marital-status
Married-civ-spouse       14976
Never-married            10683
Divorced                  4443
Separated                 1025
Widowed                    993
Married-spouse-absent      418
Married-AF-spouse           23
Name: count, dtype: int64

In [17]:
df.groupby('marital-status').sum(numeric_only=True).sort_values('hours-per-week',ascending=False)

Unnamed: 0_level_0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week
marital-status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Married-civ-spouse,647676,2796965277,154562,26430536,1833648,648234
Never-married,300737,2087279890,106429,4023093,559207,394630
Divorced,191234,825521798,44565,3236347,287021,183259
Separated,40338,211605434,9526,548958,59563,40284
Widowed,58563,172939929,9030,567074,79109,32749
Married-spouse-absent,16962,80733256,3891,273365,24152,16582
Married-AF-spouse,747,4327808,234,9951,0,946
