In [12]:
import pandas as pd
import json
import requests
import datetime
from pprint import pprint
from config import census_api_key
from census import Census
import scipy.stats as st

In [2]:
c = Census(
    census_api_key,
    year = 2017
)

In [3]:
census_data = c.acs5.get(
    (
        "NAME",
        "B19013_001E",
        "B01003_001E",
        "B01002_001E",
        "B19301_001E",
        "B17001_002E",
        "B23025_005E",
        "B23025_002E"
    ),
    {'for': 'zip code tabulation area:*'}
)

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column renaming
census_df = census_df.rename(
    columns = {
        "B01003_001E": "Population",
        "B01002_001E": "Median Age",
        "B19013_001E": "Household Income",
        "B19301_001E": "Per Capita Income",
        "B17001_002E": "Poverty Count",
        "B23025_005E": 'Unemployed',
        "B23025_002E": 'Employed',
        "NAME": "Name",
        "zip code tabulation area": "Zipcode"
    }
)

# Add a Poverty Rate column (Poverty Count / Population)
census_df["Poverty Rate"] = 100 * census_df["Poverty Count"].astype(int) / census_df["Population"].astype(int)

# Configure the final DataFrame
census_df = census_df[
    [
        "Zipcode",
        "Population",
        "Median Age",
        'Unemployed',
        'Employed',
        "Household Income",
        "Per Capita Income",
        "Poverty Count",
        "Poverty Rate",
        
    ]
]

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_df)}")
census_df.head()

Number of rows in the DataFrame: 33120


Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601,17599.0,38.9,2454.0,5953.0,11757.0,7041.0,11282.0,64.105915
1,602,39209.0,40.9,2538.0,14390.0,16190.0,8978.0,20428.0,52.100283
2,603,50135.0,40.4,3588.0,16044.0,16645.0,10897.0,25176.0,50.216416
3,606,6304.0,42.8,204.0,1707.0,13387.0,5960.0,4092.0,64.911168
4,610,27590.0,41.4,1474.0,10048.0,18741.0,9266.0,12553.0,45.498369


In [4]:
poverty_df = census_df.sort_values(by='Poverty Count', ascending=False)
poverty_df.head()

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
30019,90011,108051.0,28.0,3954.0,48603.0,33824.0,10887.0,38839.0,35.945063
2645,10456,94667.0,30.5,5329.0,39388.0,26724.0,14424.0,37540.0,39.65479
27256,78521,92975.0,28.8,3280.0,34799.0,28935.0,12173.0,34978.0,37.620866
56,725,82822.0,40.4,5766.0,32983.0,20374.0,12575.0,34629.0,41.811354
2642,10453,82232.0,30.1,4555.0,36069.0,24421.0,13386.0,33803.0,41.106868


In [11]:
desired_zip_codes = ['44145', '75206', '55792', '98101', '44312', '79735']

filtered_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

filtered_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
25974,75206,38762.0,31.2,922.0,27456.0,63392.0,49708.0,5467.0,14.104019
14314,44312,31074.0,43.2,1165.0,16410.0,47393.0,24735.0,3098.0,9.96975
32338,98101,12408.0,38.9,419.0,8433.0,68750.0,70914.0,2061.0,16.610251
18733,55792,9795.0,42.7,261.0,4604.0,39080.0,27156.0,2054.0,20.969883
14256,44145,32387.0,46.7,628.0,17025.0,81966.0,51434.0,1716.0,5.298422
27714,79735,13383.0,36.4,194.0,5076.0,46445.0,18091.0,1352.0,10.102369


In [6]:
desired_zip_codes = ['98101', '98102', '98103', '98104', '98105', '98106', '98107', '98108', '98109', '98110', '98111', '98112', '98113', '98114', '98115', '98116', '98117', '98118', '98119', '98121', '98122', '98124', '98125', '98126', '98127', '98129', '98131', '98133', '98134', '98136', '98138', '98139', '98141', '98144', '98145', '98146', '98148', '98155', '98160', '98161', '98165', '98166', '98168', '98170', '98175', '98177', '98178', '98181', '98185', '98188', '98190', '98191', '98194', '98198', '98199']

seattle_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

seattle_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
32342,98105,47128.0,23.6,2112.0,25776.0,56015.0,37153.0,11346.0,24.07486
32352,98118,46800.0,38.6,2028.0,25242.0,62504.0,31665.0,9098.0,19.440171
32358,98133,48390.0,38.7,1295.0,28466.0,60409.0,36154.0,6247.0,12.909692
32355,98122,37270.0,31.4,897.0,25674.0,72018.0,51420.0,5835.0,15.656024
32345,98108,24134.0,35.6,1241.0,13613.0,55314.0,27214.0,5562.0,23.046325
32356,98125,40803.0,37.8,1298.0,23551.0,61014.0,39379.0,5499.0,13.47695
32369,98168,31771.0,36.6,1355.0,17092.0,53992.0,24762.0,5381.0,16.936829
32349,98115,51523.0,37.2,1159.0,30626.0,100794.0,55491.0,5081.0,9.861615
32375,98198,37262.0,38.2,1114.0,19867.0,59843.0,29483.0,4948.0,13.278944
32372,98178,27279.0,38.5,647.0,14235.0,72478.0,30979.0,4216.0,15.455112


In [13]:
seattle_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,34.0,34.0,34.0,34.0,34.0,34.0,34.0,34.0
mean,28102.088235,37.75,833.764706,16942.0,76699.441176,48224.382353,3422.0,12.597036
std,11864.173088,4.546477,464.556227,7366.887863,19762.051846,16769.357361,2400.848449,6.707497
min,844.0,23.6,27.0,546.0,37610.0,24762.0,294.0,4.374562
25%,22085.0,35.15,507.0,12711.5,60560.25,31986.0,1724.0,7.415718
50%,25673.0,38.3,764.0,15575.5,73088.0,44614.5,2760.0,12.971864
75%,34229.5,39.8,1147.75,20125.75,95492.5,59074.5,4765.0,15.605796
max,51523.0,47.7,2112.0,36391.0,110051.0,85165.0,11346.0,34.834123


In [14]:
desired_zip_codes = ["75001", "75006", "75007", "75019", "75032", "75039", "75041", "75042", "75043", "75050", "75051", "75052", "75061",
          "75062", "75063", "75080", "75081", "75087", "75088", "75089", "75093", "75098", "75104", "75115", "75116", "75126",
          "75134", "75149", "75150", "75159", "75166", "75180", "75181", "75182", "75201", "75202", "75203", "75204", "75205",
          "75206", "75207", "75208", "75209", "75210", "75211", "75212", "75214", "75215", "75216", "75217", "75218", "75219",
          "75220", "75221", "75222", "75223", "75224", "75225", "75226", "75227", "75228", "75229", "75230", "75231", "75232",
          "75233", "75234", "75235", "75236", "75237", "75238", "75240", "75241", "75242", "75243", "75244", "75246", "75247",
          "75248", "75249", "75250", "75251", "75252", "75253", "75254", "75260", "75261", "75262", "75263", "75264", "75265",
          "75266", "75267", "75270", "75275", "75277", "75283", "75284", "75285", "75287", "75301", "75303", "75312", "75313",
          "75315", "75320", "75326", "75336", "75339", "75342", "75354", "75355", "75356", "75357", "75358", "75359", "75360",
          "75367", "75368", "75370", "75371", "75372", "75373", "75374", "75376", "75378", "75379", "75380", "75381", "75382",
          "75389", "75390", "75391", "75392", "75393", "75394", "75395", "75397", "75398"]

dallas_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

dallas_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
25984,75217,85249.0,28.6,1358.0,33662.0,35282.0,12792.0,26496.0,31.080716
25983,75216,52895.0,34.2,1834.0,19843.0,25763.0,13027.0,19585.0,37.026184
25979,75211,77600.0,28.4,2219.0,35942.0,38760.0,14994.0,19107.0,24.622423
26006,75243,67445.0,31.2,2432.0,37832.0,38640.0,25034.0,17188.0,25.484469
25993,75228,74605.0,31.0,2636.0,37124.0,40306.0,19520.0,16217.0,21.737149
...,...,...,...,...,...,...,...,...,...
25975,75207,8028.0,32.1,8.0,1851.0,79926.0,29326.0,150.0,1.868460
25967,75182,6077.0,38.0,166.0,3264.0,137721.0,48606.0,139.0,2.287313
25960,75166,4057.0,33.2,36.0,2070.0,91136.0,33750.0,19.0,0.468326
26018,75390,0.0,-666666666.0,0.0,0.0,-666666666.0,-666666666.0,0.0,


In [18]:
dallas_df = dallas_df.dropna()

In [19]:
dallas_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0
mean,32953.134146,34.446341,988.304878,17417.597561,62511.54878,34700.560976,5436.743902,17.122951
std,19615.54012,5.1324,649.245045,10287.104139,28061.532142,21032.202366,4892.123601,11.045396
min,712.0,26.8,8.0,178.0,16886.0,11578.0,19.0,0.468326
25%,18955.5,31.0,511.0,9773.0,41967.0,18975.0,2041.75,7.890119
50%,30542.0,33.75,916.0,16489.0,57719.5,28269.5,4197.5,15.203942
75%,42663.0,37.4,1299.5,23724.25,81664.5,45767.5,7502.0,24.074648
max,94133.0,59.6,3290.0,52441.0,173828.0,117586.0,26496.0,58.286517


In [20]:
desired_zip_codes = ["94101", "94102", "94103", "94104", "94105", "94107", "94108", "94109", "94110", "94111", "94112", "94114",
                 "94115", "94116", "94117", "94118", "94119", "94120", "94121", "94122", "94123", "94124", "94125", "94126",
                 "94127", "94129", "94130", "94131", "94132", "94133", "94134", "94140", "94141", "94142", "94146", "94147",
                 "94157", "94159", "94164", "94165", "94166", "94167", "94168", "94169", "94170", "94172", "94188"]

sanfransisco_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

sanfransisco_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
30985,94112,85373.0,41.4,3409.0,49223.0,82692.0,31905.0,7750.0,9.077811
30994,94124,35492.0,35.3,1854.0,18412.0,55823.0,26061.0,7529.0,21.213231
30983,94110,73737.0,36.6,2728.0,49145.0,109747.0,59660.0,7321.0,9.92853
30976,94102,30140.0,41.5,1045.0,17531.0,33552.0,43488.0,6957.0,23.082283
30982,94109,56587.0,36.9,1610.0,39109.0,79979.0,75460.0,6718.0,11.871985
30992,94122,62516.0,39.5,1570.0,37523.0,102838.0,51615.0,5924.0,9.475974
30977,94103,26990.0,38.8,762.0,16116.0,49052.0,55807.0,5870.0,21.748796
31000,94132,31155.0,33.2,1874.0,17391.0,72970.0,34028.0,5124.0,16.446798
31002,94134,43074.0,40.2,1845.0,23276.0,71352.0,28790.0,5048.0,11.719367
30987,94115,35751.0,36.0,1128.0,22657.0,103625.0,76686.0,4531.0,12.673771


In [24]:
sanfransisco_df = sanfransisco_df.dropna()

In [25]:
sanfransisco_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,32988.153846,38.853846,1111.076923,20382.730769,99611.653846,64917.769231,3774.115385,13.60277
std,21364.040838,5.382136,823.142949,13381.497299,43221.162092,29185.526139,2428.532352,9.457921
min,436.0,25.8,10.0,161.0,33552.0,18012.0,84.0,3.035581
25%,22185.0,35.925,476.5,12561.75,67654.5,50463.5,1589.75,8.539425
50%,30647.5,39.15,1015.5,18389.0,100605.0,61963.5,3921.5,10.631043
75%,43476.0,42.475,1592.5,25781.25,129715.25,76670.25,5683.5,18.561241
max,85373.0,51.3,3409.0,49223.0,199364.0,154723.0,7750.0,49.888143


In [None]:
desired_zip_codes = 

cleveland_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

cleveland_df