# Income Analysis
In order to analyze the data of yearly income we use the "Income in the past 12 months" table.

The table is derived from the "American Community Survey" and it could be of two types: "ACS 5-Year Estimates Subject Tables" or "ACS 1-Year Estimates Subject Tables".
The problem is that this two types of tables can be compared: we can compare two "1-Year" or two "5-Years" that not overlap but we cannot compare a "1-Year" and a "5-Years".

In all the cities the 2020 data is only available in "5-Years" table.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
def get_incomes(df, city, year):
    '''
    function to get the incomes from the social data and plot it into a bar charts
    '''
    
    estimates = list()
    margins = list()
    
    for i in range(1, 11):
        
        for type in ['household', 'family', 'married', 'nonfamily']:
            
            # obtain the value from the percentage
            estimate = float(df[type + '_estimates'][i][:-1])
            margin = float(df[type + '_margins'][i][1:-1])
            
            # append to the list
            estimates.append(estimate)
            margins.append(margin)
    
    # plot the data
    plt.figure(figsize=(10, 5))
    for i in range(4):
        plt.bar(np.arange(10) + i * 0.2, estimates[i * 10: (i + 1) * 10], width=0.2, label=['household', 'family', 'married', 'nonfamily'][i])
        plt.errorbar(np.arange(10) + i * 0.2, estimates[i * 10: (i + 1) * 10], yerr=margins[i * 10: (i + 1) * 10], fmt='o', color='black', capsize=5)
    plt.xticks(np.arange(10), ['<10k', '10-15k', '15-25k', '25-35k', '35-50k', '50-75k', '75-100k', '100-150k', '150-200k', '>200k'])
    plt.legend()
    plt.title(f'{city} incomes in {year}')
    plt.show()