### Demographic Data Analyzer

#### Importing Data

In [4]:
import pandas as pd

In [3]:
df = pd.read_csv("adult.data.csv")
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


### How many of each race are represented in this dataset?

In [3]:
race_counts = df["race"].value_counts()
print(race_counts)

White                 27816
Black                  3124
Asian-Pac-Islander     1039
Amer-Indian-Eskimo      311
Other                   271
Name: race, dtype: int64


### What is the average age of men? 

In [5]:
men_age = df[["age","sex"]]
men_age = men_age.loc[df["sex"]=="Male","age"]
print(men_age)
average_men_age = men_age.mean().round(1)
print("Average age of men is : ",average_men_age)

0        39
1        50
2        38
3        53
7        52
         ..
32553    32
32554    53
32555    22
32557    40
32559    22
Name: age, Length: 21790, dtype: int64
Average age of men is :  39.4


### What is the percentage of people who have a Bachelor's degree?

In [6]:
bachelors_ppl = df.loc[df["education"]=="Bachelors","education"].value_counts()
bachelors_ppl = bachelors_ppl/len(df)*100
bachelors_ppl.round(1)

Bachelors    16.4
Name: education, dtype: float64

### What percentage of people with advanced education (`Bachelors`, `Masters`, or `Doctorate`) make more than 50K?

In [8]:
# ppl with higher education
higher_education = df.loc[df["education"].isin(["Bachelors","Masters","Doctorate"])]
higher_education.head(7)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
5,37,Private,284582,Masters,14,Married-civ-spouse,Exec-managerial,Wife,White,Female,0,0,40,United-States,<=50K
8,31,Private,45781,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Female,14084,0,50,United-States,>50K
9,42,Private,159449,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,5178,0,40,United-States,>50K
11,30,State-gov,141297,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,India,>50K


In [9]:
#higher education ppl who make more than 50K
high_education_ppl = higher_education.loc[higher_education["salary"]==">50K"]
high_education_ppl.head(7)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
8,31,Private,45781,Masters,14,Never-married,Prof-specialty,Not-in-family,White,Female,14084,0,50,United-States,>50K
9,42,Private,159449,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,5178,0,40,United-States,>50K
11,30,State-gov,141297,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,40,India,>50K
19,43,Self-emp-not-inc,292175,Masters,14,Divorced,Exec-managerial,Unmarried,White,Female,0,0,45,United-States,>50K
20,40,Private,193524,Doctorate,16,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,60,United-States,>50K
25,56,Local-gov,216851,Bachelors,13,Married-civ-spouse,Tech-support,Husband,White,Male,0,0,40,United-States,>50K
45,57,Federal-gov,337895,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,Black,Male,0,0,40,United-States,>50K


In [10]:
#percentage of ppl with high education who earn more than 50K
percentage_higher_education = round(len(high_education_ppl)/len(higher_education),1)
percentage_higher_education

0.5

In [11]:
#ppl with lower education
lower_education = df.loc[~df["education"].isin(["Bachelors","Masters","Doctorate"])]
lower_education.head(7)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
6,49,Private,160187,9th,5,Married-spouse-absent,Other-service,Not-in-family,Black,Female,0,0,16,Jamaica,<=50K
7,52,Self-emp-not-inc,209642,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,>50K
10,37,Private,280464,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,Black,Male,0,0,80,United-States,>50K
13,32,Private,205019,Assoc-acdm,12,Never-married,Sales,Not-in-family,Black,Male,0,0,50,United-States,<=50K
14,40,Private,121772,Assoc-voc,11,Married-civ-spouse,Craft-repair,Husband,Asian-Pac-Islander,Male,0,0,40,?,>50K


In [12]:
# ppl with lower education but earn >50K
low_education_ppl = lower_education.loc[lower_education["salary"]==">50K"]
low_education_ppl.head(7)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
7,52,Self-emp-not-inc,209642,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,>50K
10,37,Private,280464,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,Black,Male,0,0,80,United-States,>50K
14,40,Private,121772,Assoc-voc,11,Married-civ-spouse,Craft-repair,Husband,Asian-Pac-Islander,Male,0,0,40,?,>50K
27,54,?,180211,Some-college,10,Married-civ-spouse,?,Husband,Asian-Pac-Islander,Male,0,0,60,South,>50K
38,31,Private,84154,Some-college,10,Married-civ-spouse,Sales,Husband,White,Male,0,0,38,?,>50K
52,47,Private,51835,Prof-school,15,Married-civ-spouse,Prof-specialty,Wife,White,Female,0,1902,60,Honduras,>50K
55,43,Private,237993,Some-college,10,Married-civ-spouse,Tech-support,Husband,White,Male,0,0,40,United-States,>50K


In [13]:
#percentage of lower education ppl who earn > 50K
percentage_lower_education = round(len(low_education_ppl)/len(lower_education)*100,1)
percentage_lower_education

17.4

### What is the minimum number of hours a person works per week (hours-per-week feature)?

In [25]:
minimum_work_hours = df["hours-per-week"].min()
minimum_work_hours.round(1)

1

### What percentage of the people who work the minimum number of hours per week have a salary of >50K?

In [53]:
min_work_ppl = df.loc[df["hours-per-week"]==minimum_work_hours]

#now who earn >50K
rich_ppl = min_work_ppl.loc[min_work_ppl["salary"]==">50K"]

#now the percentage of it
percentage_min_workers = round(len(rich_ppl)/len(min_work_ppl)*100,1)
percentage_min_workers

10.0

### What country has the highest percentage of people that earn >50K?

In [104]:
highest_earning_country = (df.loc[df["salary"] == ">50K",
                                      "native-country"].value_counts() / df["native-country"].value_counts()).fillna(0).sort_values(ascending=False).index[0]
highest_earning_country

'Iran'

In [106]:
highest_earning_country_percentage = round(len(df[(df["native-country"] == highest_earning_country) & (
        df["salary"] == ">50K")]) / len(df[df["native-country"] == highest_earning_country]) * 100, 1)
highest_earning_country_percentage

41.9

### Identify the most popular occupation for those who earn >50K in India

In [127]:
rich_indians = df.loc[(df["salary"]==">50K") & (df["native-country"]=="India")]
rich_indians["occupation"].value_counts().sort_values(ascending=False).index[0]


'Prof-specialty'