In [1]:
import census
import pandas as pd
import numpy as np
import sys
from pathlib import Path
import geopandas as gpd
from shapely.geometry import Point
from pyproj import Proj
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.display.float_format = '{:.2f}'.format # avoid scientific notation

In [2]:
# Define outputs and inputs, read data
home = str(Path.home())
output_path = home+'/Documents/'
df = pd.read_csv(output_path + 'census_summ_2018.csv')

In [26]:
df = df.loc[df['county_y']==53]

In [27]:
# find the margin of error columns 
moe_cols = df.columns.str.contains('moe')
df.columns[moe_cols]

Index(['pop_18_moe', 'pop_nhislat_18_moe', 'white_18_moe', 'blk_18_moe',
       'aian_18_moe', 'asian_18_moe', 'nhopi_18_moe', 'sora_18_moe',
       'two_18_moe', 'twosora_18_moe', 'twoexc_18_moe', 'hislat_18_moe',
       'pop_12_moe', 'pop_nhislat_12_moe', 'white_12_moe', 'blk_12_moe',
       'aian_12_moe', 'asian_12_moe', 'nhopi_12_moe', 'sora_12_moe',
       'two_12_moe', 'twosora_12_moe', 'twoexc_12_moe', 'hislat_12_moe'],
      dtype='object')

In [28]:
# print all columns in the df
# for i in df.columns:
#     print("'"+i+"'"+",")

In [29]:
# Create a copy of the data frame with the data points that we need 
pop_calcs = df[['GEO_ID_y','state_y','county_y','tract_y','pop_12','pop_12_moe','pop_18','pop_18_moe',
                'pop_nhislat_12','pop_nhislat_12_moe','pop_nhislat_18','pop_nhislat_18_moe',
                'white_12','white_12_moe','white_18','white_18_moe',
                'blk_12','blk_12_moe','blk_18','blk_18_moe',
                'aian_12','aian_12_moe','aian_18','aian_18_moe',
                'asian_12','asian_12_moe','asian_18','asian_18_moe',
                'nhopi_12','nhopi_12_moe','nhopi_18','nhopi_18_moe',
                'hislat_12','hislat_12_moe','hislat_18','hislat_18_moe']].copy()

In [30]:
# first is enumerate then the column name; Create new columns based on titles 
for i,j in enumerate(pop_calcs.columns):
    if i<len(pop_calcs.columns):
        if str(j)+ "_moe"== pop_calcs.columns[i+1]:
            pop_calcs[str(j)+"moe_pct"] = pop_calcs[pop_calcs.columns[i+1]]/pop_calcs[pop_calcs.columns[i]]

In [31]:
# Remove inf values and replace with nan
pop_calcs.replace([np.inf, -np.inf], np.nan, inplace=True)

In [32]:
pop_calcs['poc_12'] = pop_calcs.loc[:,('pop_12')]- pop_calcs.loc[:,('white_12')]
pop_calcs['poc_18'] = pop_calcs.loc[:,'pop_18']-pop_calcs.loc[:,'white_18']

pop_calcs['poc_chg_1218'] = (pop_calcs.loc[:,'poc_18'] - pop_calcs.loc[:,'poc_12'])/ pop_calcs.loc[:,'poc_12']

In [37]:
pop_calcs.loc[pop_calcs['poc_chg_1218']<33].describe()

Unnamed: 0,state_y,county_y,tract_y,pop_12,pop_12_moe,pop_18,pop_18_moe,pop_nhislat_12,pop_nhislat_12_moe,pop_nhislat_18,pop_nhislat_18_moe,white_12,white_12_moe,white_18,white_18_moe,blk_12,blk_12_moe,blk_18,blk_18_moe,aian_12,aian_12_moe,aian_18,aian_18_moe,asian_12,asian_12_moe,asian_18,asian_18_moe,nhopi_12,nhopi_12_moe,nhopi_18,nhopi_18_moe,hislat_12,hislat_12_moe,hislat_18,hislat_18_moe,pop_12moe_pct,pop_18moe_pct,pop_nhislat_12moe_pct,pop_nhislat_18moe_pct,white_12moe_pct,white_18moe_pct,blk_12moe_pct,blk_18moe_pct,aian_12moe_pct,aian_18moe_pct,asian_12moe_pct,asian_18moe_pct,nhopi_12moe_pct,nhopi_18moe_pct,hislat_12moe_pct,hislat_18moe_pct,poc_12,poc_18,poc_chg_1218
count,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,171.0,160.0,162.0,140.0,133.0,165.0,169.0,93.0,109.0,170.0,171.0,171.0,171.0,171.0
mean,53.0,53.0,125613.12,4665.75,392.42,5014.19,404.02,4238.44,391.94,4485.47,406.44,3278.57,374.18,3381.44,377.92,307.25,152.77,322.61,150.83,46.22,49.08,54.07,58.08,272.35,133.15,306.32,134.12,59.54,62.6,69.04,70.54,427.31,221.75,528.71,217.73,0.09,0.08,0.1,0.1,0.12,0.12,0.8,0.71,1.22,1.26,0.67,0.6,1.26,1.22,0.62,0.5,1387.19,1632.75,0.29
std,0.0,0.0,214205.13,1432.41,146.28,1665.11,149.48,1320.33,126.08,1525.35,140.15,1192.57,118.98,1351.95,129.06,320.3,120.55,320.48,116.06,51.88,43.63,68.11,60.33,268.91,85.17,272.92,87.73,112.77,85.21,102.63,81.75,339.01,142.0,399.89,118.74,0.03,0.03,0.03,0.03,0.04,0.04,0.49,0.39,0.42,0.65,0.31,0.28,0.46,0.46,0.24,0.2,948.3,1024.27,0.45
min,53.0,53.0,60300.0,787.0,55.0,816.0,89.0,699.0,128.0,680.0,133.0,496.0,121.0,435.0,113.0,0.0,3.0,0.0,2.0,0.0,3.0,0.0,3.0,0.0,11.0,0.0,3.0,0.0,2.0,0.0,4.0,0.0,12.0,29.0,28.0,0.01,0.01,0.03,0.03,0.05,0.04,0.23,0.21,0.45,0.39,0.14,0.16,0.49,0.38,0.2,0.15,187.0,232.0,-0.45
25%,53.0,53.0,70257.0,3746.5,277.5,3959.5,302.5,3361.0,301.0,3485.0,308.5,2513.5,289.0,2473.0,293.5,46.5,52.5,66.0,60.0,8.5,17.0,2.5,17.0,96.5,70.0,111.5,68.5,0.0,12.0,0.0,17.0,183.0,112.5,274.0,133.0,0.07,0.07,0.08,0.08,0.09,0.1,0.46,0.39,0.94,0.93,0.48,0.42,0.93,0.91,0.47,0.34,631.0,830.5,0.0
50%,53.0,53.0,71705.0,4598.0,380.0,4933.0,391.0,4074.0,370.0,4301.0,389.0,3205.0,361.0,3354.0,373.0,199.0,135.0,204.0,119.0,29.0,36.0,28.0,33.0,201.0,117.0,214.0,115.0,8.0,17.0,18.0,28.0,357.0,191.0,429.0,195.0,0.08,0.08,0.09,0.09,0.12,0.11,0.64,0.59,1.15,1.1,0.59,0.54,1.19,1.2,0.61,0.48,1115.0,1370.0,0.2
75%,53.0,53.0,72904.0,5481.5,491.5,5947.0,492.5,5087.5,483.5,5531.0,473.5,4002.0,446.0,4185.0,453.0,480.0,227.0,515.5,219.5,62.0,62.5,81.5,80.5,358.0,183.0,389.0,176.0,70.0,84.0,109.0,113.0,607.5,300.5,662.0,280.0,0.11,0.1,0.12,0.11,0.15,0.14,1.04,0.93,1.49,1.45,0.79,0.73,1.47,1.54,0.72,0.63,1981.5,2198.5,0.47
max,53.0,53.0,940011.0,9258.0,813.0,11811.0,871.0,8067.0,731.0,10484.0,807.0,6165.0,741.0,8241.0,733.0,1884.0,620.0,1407.0,584.0,261.0,266.0,470.0,303.0,1709.0,478.0,1484.0,432.0,724.0,449.0,623.0,556.0,1913.0,704.0,2389.0,773.0,0.22,0.27,0.25,0.3,0.28,0.33,3.5,2.0,3.0,6.5,1.88,1.67,2.91,4.0,2.5,1.11,5963.0,6013.0,2.44


In [34]:
pop_calcs.loc[pop_calcs['poc_chg_1218']>=33]

Unnamed: 0,GEO_ID_y,state_y,county_y,tract_y,pop_12,pop_12_moe,pop_18,pop_18_moe,pop_nhislat_12,pop_nhislat_12_moe,pop_nhislat_18,pop_nhislat_18_moe,white_12,white_12_moe,white_18,white_18_moe,blk_12,blk_12_moe,blk_18,blk_18_moe,aian_12,aian_12_moe,aian_18,aian_18_moe,asian_12,asian_12_moe,asian_18,asian_18_moe,nhopi_12,nhopi_12_moe,nhopi_18,nhopi_18_moe,hislat_12,hislat_12_moe,hislat_18,hislat_18_moe,pop_12moe_pct,pop_18moe_pct,pop_nhislat_12moe_pct,pop_nhislat_18moe_pct,white_12moe_pct,white_18moe_pct,blk_12moe_pct,blk_18moe_pct,aian_12moe_pct,aian_18moe_pct,asian_12moe_pct,asian_18moe_pct,nhopi_12moe_pct,nhopi_18moe_pct,hislat_12moe_pct,hislat_18moe_pct,poc_12,poc_18,poc_chg_1218
398,1400000US53053060200,53,53,60200,684.0,122.0,2414.0,371.0,684.0,122.0,1388.0,222.0,639.0,125.0,873.0,147.0,7.0,12.0,155.0,120.0,1.0,3.0,8.0,12.0,20.0,23.0,289.0,131.0,16.0,27.0,18.0,41.0,0.0,12.0,1026.0,304.0,0.18,0.15,0.18,0.16,0.2,0.17,1.71,0.77,3.0,1.5,1.15,0.45,1.69,2.28,,0.3,45.0,1541.0,33.24


# Analysis from the Data 

It seems like there was a 48% increase of people of color across all 172 census tracts in King County with a maximum of 3300% increase in POC in Census Tract: 53053060200 which seems to be the [Port of Tacoma](https://www.citivelocity.com/citybuilder/eppublic/cb/us/cities/15012/zones/53053060200) area of Tacoma Washington. I think that this brings up the mean way too much given that the median is only a 20% increase. When we remove the outlier, the mean is brought down to 29% increase on average with the median staying at 20%. We can use this change in POC to do a couple of things relating to finding relationships, but either way, we are at a good starting point for further analysis. Awaiting the other datasets to bring in. 