# Demographic Data Analyzer

This project analyzes demographic information using the UCI Adult (Census Income) dataset. It calculates summary statistics, compares education levels, and evaluates income levels across different groups.

Key features include:
- Counting race distribution
- Calculating average age of men
- Determining the percentage of individuals with Bachelor's degrees
- Comparing income levels of people with and without higher education
- Identifying minimum work hours and who earns >50K within that group
- Finding the country with the highest percentage of high earners
- Discovering the most common occupation for high earners in India

Developed as part of the **freeCodeCamp Data Analysis with Python** certification.  
Built using **Pandas** for data manipulation and analysis.

In [1]:
import pandas as pd

In [2]:
def calculate_demographic_data(print_data=True):
    # Read data from file
    df = pd.read_csv('adult.data.csv')
    # How many of each race are represented in this dataset? This should be a Pandas series with race names as the index labels.
    race_count = df['race'].value_counts()
    
    # What is the average age of men?
    m = df.loc[df['sex'] == 'Male']
    average_age_men = round(m['age'].mean(), 1)
    
    # What is the percentage of people who have a Bachelor's degree?
    b = len(df.loc[df['education'] == 'Bachelors']) # number of Bachelor's degrees
    a = len(df) # number of all degrees
    percentage_bachelors = round(b*100/a, 1) # percentage of Bachelor's degrees
    
    # What percentage of people with advanced education (`Bachelors`, `Masters`, or `Doctorate`) make more than 50K?
    # What percentage of people without advanced education make more than 50K?

    # with and without `Bachelors`, `Masters`, or `Doctorate`
    bmd = df.loc[(df['education']=='Bachelors') | (df['education']=='Masters') | (df['education']=='Doctorate')] # df of Bachelor's, Masters and Doctorates
    higher_education = len(bmd)
    lower_education = len(df) - higher_education

    # percentage with salary >50K
    bmd5 = df.loc[((df['education']=='Bachelors') | (df['education']=='Masters') | (df['education']=='Doctorate')) & (df['salary'] == '>50K')] # df of Bachelor's, Masters and Doctorates with salary > 50K

    No_bmd = df.loc[~(df['education']=='Bachelors') & ~(df['education']=='Masters') & ~(df['education']=='Doctorate')] # df of lower education
    No_bmd5 = df.loc[(~(df['education']=='Bachelors') & ~(df['education']=='Masters') & ~(df['education']=='Doctorate')) & (df['salary'] == '>50K')] # df of lower education with salary > 50K

    higher_education_rich =  round(len(bmd5)*100/higher_education, 1)
    lower_education_rich =  round(len(No_bmd5)*100/lower_education, 1)
    
    # What is the minimum number of hours a person works per week (hours-per-week feature)?
    min_work_hours = df['hours-per-week'].min()
    
    # What percentage of the people who work the minimum number of hours per week have a salary of >50K?
    min_hours = df.loc[df['hours-per-week']==df['hours-per-week'].min()] # df of entries with minimum number of hours per week
    min_hours5 = df.loc[(df['hours-per-week']==df['hours-per-week'].min()) & (df['salary'] == '>50K')] # df of entries with minimum number of hours per week and >50K salary

    num_min_workers = len(min_hours)

    rich_percentage = round(len(min_hours5)*100/num_min_workers, 1)
    
    # What country has the highest percentage of people that earn >50K?
    A = pd.DataFrame(columns=['country','percentage']) # empty dataframe
    A.set_index('country', inplace=True)
    countries = df['native-country'].unique() # list of all countries

    for con in countries:
        g = df.loc[df['native-country']==con]
        a = df.loc[(df['native-country']==con) & (df['salary'] == '>50K')]
        percentage = len(a)*100/len(g)
        A.loc[con] = percentage

    highest_earning_country = A[A['percentage']==A['percentage'].max()].index.values[0]
    highest_earning_country_percentage = round(A['percentage'].max(), 1)

    # Identify the most popular occupation for those who earn >50K in India.
    occupations_india = df.loc[(df['native-country']=='India') & (df['salary'] == '>50K')][['native-country','occupation']] # df of occupations with salary >50K in India
    occupations_india.set_index('native-country', inplace=True) # native-country i.e. India is the index

    top_IN_occupation = occupations_india.value_counts().idxmax()[0] # most popular occupation


    # DO NOT MODIFY BELOW THIS LINE

    if print_data:
        print("Number of each race:\n", race_count) 
        print("Average age of men:", average_age_men)
        print(f"Percentage with Bachelors degrees: {percentage_bachelors}%")
        print(f"Percentage with higher education that earn >50K: {higher_education_rich}%")
        print(f"Percentage without higher education that earn >50K: {lower_education_rich}%")
        print(f"Min work time: {min_work_hours} hours/week")
        print(f"Percentage of rich among those who work fewest hours: {rich_percentage}%")
        print("Country with highest percentage of rich:", highest_earning_country)
        print(f"Highest percentage of rich people in country: {highest_earning_country_percentage}%")
        print("Top occupations in India:", top_IN_occupation)

    return {
        'race_count': race_count,
        'average_age_men': average_age_men,
        'percentage_bachelors': percentage_bachelors,
        'higher_education_rich': higher_education_rich,
        'lower_education_rich': lower_education_rich,
        'min_work_hours': min_work_hours,
        'rich_percentage': rich_percentage,
        'highest_earning_country': highest_earning_country,
        'highest_earning_country_percentage':
        highest_earning_country_percentage,
        'top_IN_occupation': top_IN_occupation
    }

In [4]:
calculate_demographic_data() # run function

Number of each race:
 race
White                 27816
Black                  3124
Asian-Pac-Islander     1039
Amer-Indian-Eskimo      311
Other                   271
Name: count, dtype: int64
Average age of men: 39.4
Percentage with Bachelors degrees: 16.4%
Percentage with higher education that earn >50K: 46.5%
Percentage without higher education that earn >50K: 17.4%
Min work time: 1 hours/week
Percentage of rich among those who work fewest hours: 10.0%
Country with highest percentage of rich: Iran
Highest percentage of rich people in country: 41.9%
Top occupations in India: Prof-specialty


{'race_count': race
 White                 27816
 Black                  3124
 Asian-Pac-Islander     1039
 Amer-Indian-Eskimo      311
 Other                   271
 Name: count, dtype: int64,
 'average_age_men': 39.4,
 'percentage_bachelors': 16.4,
 'higher_education_rich': 46.5,
 'lower_education_rich': 17.4,
 'min_work_hours': 1,
 'rich_percentage': 10.0,
 'highest_earning_country': 'Iran',
 'highest_earning_country_percentage': 41.9,
 'top_IN_occupation': 'Prof-specialty'}