In [1]:
import numpy as np

In [2]:
def calculate(l:list)-> dict: 
    l = np.array(l)
    l = l.reshape(3,3)
    mean_ax0 = l.mean(axis=0).tolist()
    mean_ax1 = l.mean(axis=1).tolist()
    mean_ax2 = l.mean().tolist()
    mean = {'mean': [mean_ax0, mean_ax1, mean_ax2]}

    var_ax0 = l.var(axis=0).tolist()
    var_ax1 = l.var(axis=1).tolist()
    var_ax2 = l.var().tolist()
    variance = {'variance': [var_ax0, var_ax1, var_ax2]}

    std_ax0 = l.std(axis=0).tolist()
    std_ax1 = l.std(axis=1).tolist()
    std_ax2 = l.std().tolist()
    std = {'standard deviation': [std_ax0, std_ax1, std_ax2]}

    max_ax0 = l.max(axis=0).tolist()
    max_ax1 = l.max(axis=1).tolist()
    max_ax2 = l.max().tolist()
    max = {'max': [max_ax0, max_ax1, max_ax2]}

    min_ax0 = l.min(axis=0).tolist()
    min_ax1 = l.min(axis=1).tolist()
    min_ax2 = l.min().tolist()
    min = {'min': [min_ax0, min_ax1, min_ax2]}

    sum_ax0 = l.sum(axis=0).tolist()
    sum_ax1 = l.sum(axis=1).tolist()
    sum_ax2 = l.sum().tolist()
    sum = {'sum': [sum_ax0, sum_ax1, sum_ax2]}
    
    return {**mean, **variance, **std, **max, **min, **sum}



In [3]:
calculate([0,1,2,3,4,5,6,7,8])

{'mean': [[3.0, 4.0, 5.0], [1.0, 4.0, 7.0], 4.0],
 'variance': [[6.0, 6.0, 6.0],
  [0.6666666666666666, 0.6666666666666666, 0.6666666666666666],
  6.666666666666667],
 'standard deviation': [[2.449489742783178,
   2.449489742783178,
   2.449489742783178],
  [0.816496580927726, 0.816496580927726, 0.816496580927726],
  2.581988897471611],
 'max': [[6, 7, 8], [2, 5, 8], 8],
 'min': [[0, 1, 2], [0, 3, 6], 0],
 'sum': [[9, 12, 15], [3, 12, 21], 36]}

In [4]:
import pandas as pd

In [5]:
def calculate_demographic_data(print_data=True):
    df = pd.read_csv('adult_data.csv')
    race_count = df['race'].value_counts()
    average_age_men = df.query('sex == "Male"')['age'].mean()
    percentage_bachelors = df['education'].value_counts(normalize=True)['Bachelors']*100
    advanced_df = df.query('education == "Bachelors" | education == "Masters" | education == "Doctorate"')
    no_advanced_df = df.query('education != "Bachelors" & education != "Masters" & education != "Doctorate"')
    higher_education = advanced_df['education'].count()
    lower_education = no_advanced_df['education'].count()
    higher_education_rich = (advanced_df['salary'] == '>50K').sum()/higher_education*100
    lower_education_rich = (no_advanced_df['salary'] == '>50K').sum()/lower_education*100
    min_work_hours  = df['hours-per-week'].min()
    num_min_workers = df[df['hours-per-week'] == min_work_hours]
    rich_percentage = num_min_workers[num_min_workers['salary'] == '>50K'].shape[0]/num_min_workers.shape[0]*100
    highest_earning_country = df.groupby('native-country')['salary'].value_counts().loc[:,'>50K']
    highest_earning_country_percentage = df.groupby('native-country')['salary'].value_counts(normalize=True).loc[:,'>50K']
    india = df[df['native-country'] == 'India']
    top_IN_occupation = india.groupby('occupation')['salary'].value_counts().loc[:,'>50K'].sort_values(ascending=False).idxmax()
    if print_data:
        print("Number of each race:\n", race_count) 
        print("Average age of men:", average_age_men)
        print(f"Percentage with Bachelors degrees: {percentage_bachelors}%")
        print(f"Percentage with higher education that earn >50K: {higher_education_rich}%")
        print(f"Percentage without higher education that earn >50K: {lower_education_rich}%")
        print(f"Min work time: {min_work_hours} hours/week")
        print(f"Percentage of rich among those who work fewest hours: {rich_percentage}%")
        print("Country with highest percentage of rich:", highest_earning_country)
        print(f"Highest percentage of rich people in country: {highest_earning_country_percentage}%")
        print("Top occupations in India:", top_IN_occupation)
    return {
        'race_count': race_count,
        'average_age_men': average_age_men,
        'percentage_bachelors': percentage_bachelors,
        'higher_education_rich': higher_education_rich,
        'lower_education_rich': lower_education_rich,
        'min_work_hours': min_work_hours,
        'rich_percentage': rich_percentage,
        'highest_earning_country': highest_earning_country,
        'highest_earning_country_percentage':
        highest_earning_country_percentage,
        'top_IN_occupation': top_IN_occupation
    }
    

In [6]:
calculate_demographic_data(print_data=False)

{'race_count': race
 White                 27816
 Black                  3124
 Asian-Pac-Islander     1039
 Amer-Indian-Eskimo      311
 Other                   271
 Name: count, dtype: int64,
 'average_age_men': np.float64(39.43354749885268),
 'percentage_bachelors': np.float64(16.44605509658794),
 'higher_education_rich': np.float64(46.535843011613935),
 'lower_education_rich': np.float64(17.3713601914639),
 'min_work_hours': np.int64(1),
 'rich_percentage': 10.0,
 'highest_earning_country': native-country
 ?                      146
 Cambodia                 7
 Canada                  39
 China                   20
 Columbia                 2
 Cuba                    25
 Dominican-Republic       2
 Ecuador                  4
 El-Salvador              9
 England                 30
 France                  12
 Germany                 44
 Greece                   8
 Guatemala                3
 Haiti                    4
 Honduras                 1
 Hong                     6
 Hungary  