In [9]:
import pandas as pd

def calculate_demographic_data(print_data=True):

    url = "https://raw.githubusercontent.com/freeCodeCamp/boilerplate-demographic-data-analyzer/master/adult.data.csv"

    # Correct read (dataset already contains header)
    df = pd.read_csv(url)

    race_count = df['race'].value_counts()

    average_age_men = df[df['sex'] == 'Male']['age'].mean().round(1)

    percentage_bachelors = (
        (df['education'] == 'Bachelors').mean() * 100
    ).round(1)

    higher_education = df[df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]
    lower_education = df[~df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])]

    higher_education_rich = (
        (higher_education['salary'] == '>50K').mean() * 100
    ).round(1)

    lower_education_rich = (
        (lower_education['salary'] == '>50K').mean() * 100
    ).round(1)

    min_work_hours = df['hours-per-week'].min()
    num_min_workers = df[df['hours-per-week'] == min_work_hours]

    rich_percentage = (
        (num_min_workers['salary'] == '>50K').mean() * 100
    ).round(1)

    country_salary = df[df['salary'] == '>50K']['native-country'].value_counts()
    country_count = df['native-country'].value_counts()
    country_rich_ratio = (country_salary / country_count * 100).dropna()

    highest_earning_country = country_rich_ratio.idxmax()
    highest_earning_country_percentage = round(country_rich_ratio.max(), 1)

    top_IN_occupation = (
        df[(df['salary'] == '>50K') & (df['native-country'] == 'India')]
        ['occupation']
        .value_counts()
        .idxmax()
    )

    if print_data:
        print("Number of each race:\n", race_count)
        print("Average age of men:", average_age_men)
        print("Percentage with Bachelors degrees:", percentage_bachelors)
        print("Percentage with higher education that earn >50K:", higher_education_rich)
        print("Percentage without higher education that earn >50K:", lower_education_rich)
        print("Min work time:", min_work_hours)
        print("Percentage of rich among min workers:", rich_percentage)
        print("Country with highest percentage:", highest_earning_country)
        print("Highest percentage:", highest_earning_country_percentage)
        print("Top occupation in India:", top_IN_occupation)

    return {
        'race_count': race_count,
        'average_age_men': average_age_men,
        'percentage_bachelors': percentage_bachelors,
        'higher_education_rich': higher_education_rich,
        'lower_education_rich': lower_education_rich,
        'min_work_hours': min_work_hours,
        'rich_percentage': rich_percentage,
        'highest_earning_country': highest_earning_country,
        'highest_earning_country_percentage': highest_earning_country_percentage,
        'top_IN_occupation': top_IN_occupation
    }

calculate_demographic_data()


Number of each race:
 race
White                 27816
Black                  3124
Asian-Pac-Islander     1039
Amer-Indian-Eskimo      311
Other                   271
Name: count, dtype: int64
Average age of men: 39.4
Percentage with Bachelors degrees: 16.4
Percentage with higher education that earn >50K: 46.5
Percentage without higher education that earn >50K: 17.4
Min work time: 1
Percentage of rich among min workers: 10.0
Country with highest percentage: Iran
Highest percentage: 41.9
Top occupation in India: Prof-specialty


{'race_count': race
 White                 27816
 Black                  3124
 Asian-Pac-Islander     1039
 Amer-Indian-Eskimo      311
 Other                   271
 Name: count, dtype: int64,
 'average_age_men': np.float64(39.4),
 'percentage_bachelors': np.float64(16.4),
 'higher_education_rich': np.float64(46.5),
 'lower_education_rich': np.float64(17.4),
 'min_work_hours': 1,
 'rich_percentage': np.float64(10.0),
 'highest_earning_country': 'Iran',
 'highest_earning_country_percentage': 41.9,
 'top_IN_occupation': 'Prof-specialty'}