# Income Analysis
In order to analyze the data of yearly income we use the "Income in the past 12 months" table.

The table is derived from the "American Community Survey" and it could be of two types: "ACS 5-Year Estimates Subject Tables" or "ACS 1-Year Estimates Subject Tables".
The problem is that this two types of tables can be compared: we can compare two "1-Year" or two "5-Years" that not overlap but we cannot compare a "1-Year" and a "5-Years".

In all the cities the 2020 data is only available in "5-Years" table.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
def get_incomes(df, city, year):
    '''
    function to get the incomes from the social data and plot it into a bar charts
    '''
    
    estimates = list()
    margins = list()
    
    for i in range(1, 11):
        
        for type in ['household', 'family', 'married', 'nonfamily']:
            
            # obtain the value from the percentage
            estimate = float(df[type + '_estimates'][i][:-1])
            margin = float(df[type + '_margins'][i][1:-1])
            
            # append to the list
            estimates.append(estimate)
            margins.append(margin)
    
    # plot the data
    plt.figure(figsize=(10, 5))
    for i in range(4):
        plt.bar(np.arange(10) + i * 0.2, estimates[i * 10: (i + 1) * 10], width=0.2, label=['household', 'family', 'married', 'nonfamily'][i])
        plt.errorbar(np.arange(10) + i * 0.2, estimates[i * 10: (i + 1) * 10], yerr=margins[i * 10: (i + 1) * 10], fmt='o', color='black', capsize=5)
    plt.xticks(np.arange(10), ['<10k', '10-15k', '15-25k', '25-35k', '35-50k', '50-75k', '75-100k', '100-150k', '150-200k', '>200k'])
    plt.legend()
    plt.title(f'{city} incomes in {year}')
    plt.show()

# Gender and sex analysis
In order to analyze the data of gender per age we use the "Age and Sex" table.

The table is derived from the "American Community Survey" and it could be of two types: "ACS 5-Year Estimates Subject Tables" or "ACS 1-Year Estimates Subject Tables".
The problem is that this two types of tables can be compared: we can compare two "1-Year" or two "5-Years" that not overlap but we cannot compare a "1-Year" and a "5-Years".

In all the cities the 2020 data is only available in "5-Years" table.

In [None]:
def get_gender(df, city, year):
    '''
    function to get the gender from the social data and plot it into a bar charts
    
    Inputs:
        - df: pandas dataframe
        - city: str, city name
        - year: str, year
    '''
    
    estimates = {
        'total': list(),
        'male': list(),
        'female': list()
    }
    margins = {
        'total': list(),
        'male': list(),
        'female': list()
    }
    
    for i in range(2, 20):
        for gender in ['total', 'male', 'female']:
                
                # obtain the value from the percentage

                estimate = df[gender + '_estimates'][i]
                if type(estimate) == float:
                    estimate = str(estimate)
                estimate = int(estimate.replace(',', ''))
                
                margin = df[gender + '_margins'][i][1:]
                if type(margin) == float:
                    margin = str(margin)
                margin = int(margin.replace(',', ''))
                
                # append to the list
                estimates[gender].append(estimate)
                margins[gender].append(margin)
                
    # plot the data
    ages = ['<5', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44',
            '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80-84', '85+']
    plt.figure(figsize=(10, 5))
    for label, value in estimates.items():
        plt.bar(np.arange(18) + list(estimates.keys()).index(label) * 0.2, value, width=0.2, label=label)
        plt.errorbar(np.arange(18) + list(estimates.keys()).index(label) * 0.2, value, yerr=margins[label], fmt='o', color='black', capsize=5)
    plt.xticks(np.arange(18), ages, rotation=45)
    plt.legend(loc='best')
    plt.title(f'{city} ages per gender in {year}')
    plt.ylabel('Number of people')
    plt.show()

# Race analysis
In order to analyze the data of gender per age we use the "Race" table.

The table is derived from the "American Community Survey" and it could be of two types: "ACS 5-Year Estimates Subject Tables" or "ACS 1-Year Estimates Subject Tables".
The problem is that this two types of tables can be compared: we can compare two "1-Year" or two "5-Years" that not overlap but we cannot compare a "1-Year" and a "5-Years".

In all the cities the 2020 data is only available in "5-Years" table.

In [None]:
def get_race(df, city, year):
    '''
    function to get the race from the social data and plot it into a bar charts
    
    Inputs:
        - df: pandas dataframe
        - city: str, city name
        - year: str, year
    '''
    
    labels = ['White', 'Black or \n African American', 'Indian and \n Alaska Native',
            'Asian', 'Native Hawaiian', 'Some Other Race', 'Two or More Races']
    
    estimates = list()
    margins = list()
    
    for i in range(1, 8):
        estimate = df['estimate'][i]
        if type(estimate) == float:
            estimate = str(estimate)
        estimate = int(estimate.replace(',', ''))
        
        margin = df['margin'][i][1:]
        if type(margin) == float:
            margin = str(margin)
        margin = int(margin.replace(',', ''))
        
        estimates.append(estimate)
        margins.append(margin)
    
    # plot the data
    fig, ax = plt.subplots(figsize=(10, 5))
    
    plt.barh(labels, estimates, color=get_colors(estimates))
    plt.errorbar(estimates, labels, xerr=margins, fmt='o', color='black', capsize=5)
    plt.xscale('log')
    plt.yticks(size=8)
    for index in ax.patches:
        width = index.get_width() + margins[ax.patches.index(index)]
        plt.text(width, index.get_y() + index.get_height() / 2, f'{index.get_width()}', ha='left', va='center')
    plt.title(f'Races in {city} in {year}')
    plt.show()