In [2]:
import pandas as pd

def analyze_demographic_data():
    # Load the dataset
    df = pd.read_csv("adult.data", header=None, names=[
        'age', 'workclass', 'fnlwgt', 'education', 'education-num',
        'marital-status', 'occupation', 'relationship', 'race', 'sex',
        'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'salary'
    ])

    # 1. How many of each race are represented in this dataset?
    race_count = df['race'].value_counts()

    # 2. What is the average age of men?
    average_age_men = round(df[df['sex'] == 'Male']['age'].mean(), 1)

    # 3. What is the percentage of people who have a Bachelor's degree?
    percentage_bachelors = round((df['education'] == 'Bachelors').mean() * 100, 1)

    # 4. What percentage of people with advanced education (Bachelors, Masters, or Doctorate) make more than 50K?
    higher_education = df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])
    higher_education_rich = round((df[higher_education]['salary'] == '>50K').mean() * 100, 1)

    # 5. What percentage of people without advanced education make more than 50K?
    lower_education_rich = round((df[~higher_education]['salary'] == '>50K').mean() * 100, 1)

    # 6. What is the minimum number of hours a person works per week?
    min_work_hours = df['hours-per-week'].min()

    # 7. What percentage of the people who work the minimum number of hours per week have a salary of >50K?
    num_min_workers = df[df['hours-per-week'] == min_work_hours]
    rich_percentage = round((num_min_workers['salary'] == '>50K').mean() * 100, 1)

    # 8. What country has the highest percentage of people that earn >50K?
    country_earnings = df[df['salary'] == '>50K']['native-country'].value_counts()
    country_count = df['native-country'].value_counts()
    highest_earning_country = (country_earnings / country_count).idxmax()
    highest_earning_country_percentage = round((country_earnings / country_count).max() * 100, 1)

   # 9. Identify the most popular occupation for those who earn >50K in India.
    # Add a check to handle the case of no matching individuals
    india_high_earners = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]
    if not india_high_earners.empty:
        top_IN_occupation = india_high_earners['occupation'].value_counts().idxmax()
    else:
        top_IN_occupation = 'No data available'

    # Compile the results in a dictionary
    results = {
        'race_count': race_count,
        'average_age_men': average_age_men,
        'percentage_bachelors': percentage_bachelors,
        'higher_education_rich': higher_education_rich,
        'lower_education_rich': lower_education_rich,
        'min_work_hours': min_work_hours,
        'rich_percentage': rich_percentage,
        'highest_earning_country': highest_earning_country,
        'highest_earning_country_percentage': highest_earning_country_percentage,
        'top_IN_occupation': top_IN_occupation
    }

    return results

# Execute the function and display the results
results = analyze_demographic_data()
for key, value in results.items():
    print(f"{key}: {value}")


race_count: race
White                 27816
Black                  3124
Asian-Pac-Islander     1039
Amer-Indian-Eskimo      311
Other                   271
Name: count, dtype: int64
average_age_men: nan
percentage_bachelors: 0.0
higher_education_rich: nan
lower_education_rich: 0.0
min_work_hours: 1
rich_percentage: 0.0
highest_earning_country: nan
highest_earning_country_percentage: nan
top_IN_occupation: No data available


  highest_earning_country = (country_earnings / country_count).idxmax()
