In [1]:
import pandas as pd
import json
import requests
import datetime
from pprint import pprint
from config import census_api_key
from census import Census
import scipy.stats as st

In [2]:
c = Census(
    census_api_key,
    year = 2021
)

In [3]:
census_data = c.acs5.get(
    (
        "NAME",
        "B19013_001E",
        "B01003_001E",
        "B01002_001E",
        "B19301_001E",
        "B17001_002E",
        "B23025_005E",
        "B23025_002E"
    ),
    {'for': 'zip code tabulation area:*'}
)

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column renaming
census_df = census_df.rename(
    columns = {
        "B01003_001E": "Population",
        "B01002_001E": "Median Age",
        "B19013_001E": "Household Income",
        "B19301_001E": "Per Capita Income",
        "B17001_002E": "Poverty Count",
        "B23025_005E": 'Unemployed',
        "B23025_002E": 'Employed',
        "NAME": "Name",
        "zip code tabulation area": "Zipcode"
    }
)

# Add a Poverty Rate column (Poverty Count / Population)
census_df["Poverty Rate"] = 100 * census_df["Poverty Count"].astype(int) / census_df["Population"].astype(int)

# Configure the final DataFrame
census_df = census_df[
    [
        "Zipcode",
        "Population",
        "Median Age",
        'Unemployed',
        'Employed',
        "Household Income",
        "Per Capita Income",
        "Poverty Count",
        "Poverty Rate",
        
    ]
]

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_df)}")
census_df.head()

Number of rows in the DataFrame: 33774


Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601,17126.0,43.7,1396.0,5566.0,15292.0,7587.0,11302.0,65.993227
1,602,37895.0,44.4,939.0,12218.0,18716.0,10699.0,17121.0,45.180103
2,603,49136.0,44.1,2712.0,15859.0,16789.0,12280.0,23617.0,48.064556
3,606,5751.0,44.9,113.0,1633.0,18835.0,8574.0,3139.0,54.581812
4,610,26153.0,43.5,855.0,9464.0,21239.0,11638.0,11640.0,44.507322


In [4]:
poverty_df = census_df.sort_values(by='Poverty Count', ascending=False)
poverty_df.head()

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
2664,10456,90314.0,32.5,5674.0,38725.0,31166.0,17312.0,33294.0,36.864716
2495,8701,130352.0,18.4,2580.0,38655.0,53380.0,18966.0,32214.0,24.713085
2863,11219,93119.0,26.2,2807.0,35169.0,44450.0,17729.0,31973.0,34.335635
58,725,79153.0,43.7,4302.0,31935.0,24262.0,15654.0,31858.0,40.248632
2850,11206,90903.0,29.7,3649.0,42911.0,49013.0,27672.0,31273.0,34.402605


In [5]:
desired_zip_codes = ['44145', '75206', '55792', '98101', '44312', '79735']

filtered_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

filtered_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
26475,75206,38209.0,31.1,847.0,27249.0,79468.0,64270.0,4485.0,11.738072
14596,44312,30769.0,45.2,1169.0,16791.0,57217.0,31963.0,2847.0,9.252819
28264,79735,13267.0,36.4,75.0,5220.0,54051.0,25082.0,2599.0,19.58996
19117,55792,9907.0,44.3,280.0,4381.0,47102.0,29338.0,2125.0,21.44948
32979,98101,14528.0,35.1,303.0,10774.0,96893.0,91425.0,1799.0,12.382985
14538,44145,34049.0,46.3,737.0,17895.0,98285.0,58539.0,1638.0,4.810714


In [6]:
desired_zip_codes = ['98101', '98102', '98103', '98104', '98105', '98106', '98107', '98108', '98109', '98110', '98111', '98112', '98113', '98114', '98115', '98116', '98117', '98118', '98119', '98121', '98122', '98124', '98125', '98126', '98127', '98129', '98131', '98133','98136', '98138', '98139', '98141', '98144', '98145', '98146', '98148', '98155', '98160', '98161', '98165', '98166', '98168', '98170', '98175', '98177', '98178', '98181', '98185', '98188', '98190', '98191', '98194', '98198', '98199']

seattle_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

seattle_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
32983,98105,49448.0,24.1,1552.0,27436.0,73343.0,53246.0,10083.0,20.391118
32993,98118,48937.0,38.4,1266.0,28755.0,94790.0,48632.0,6188.0,12.644829
32996,98122,41541.0,30.9,1973.0,29716.0,87418.0,71064.0,5868.0,14.125803
32999,98133,50153.0,40.2,1184.0,29336.0,77602.0,47266.0,5188.0,10.344346
32997,98125,43386.0,37.6,1121.0,26558.0,82439.0,51315.0,4522.0,10.422717
33016,98198,38919.0,37.0,1587.0,19803.0,69833.0,36630.0,4501.0,11.565045
32986,98108,23759.0,37.1,714.0,13391.0,74985.0,39950.0,4471.0,18.818132
33010,98168,34614.0,37.6,1533.0,19285.0,71977.0,32867.0,4116.0,11.891142
32990,98115,53729.0,37.1,1292.0,33862.0,128443.0,72682.0,3874.0,7.210259
32984,98106,27489.0,35.4,585.0,16557.0,84506.0,45215.0,3673.0,13.361708


In [7]:
seattle_df = seattle_df.dropna()

In [8]:
seattle_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,33.0,33.0,33.0,33.0,33.0,33.0,33.0,33.0
mean,30318.333333,37.381818,870.030303,18731.060606,102513.090909,63975.121212,2966.151515,9.686978
std,11474.827057,4.428632,410.814168,7393.368984,25487.077011,19797.188553,1934.687412,4.380641
min,11068.0,24.1,303.0,6472.0,60955.0,32867.0,641.0,2.610254
25%,22890.0,35.1,585.0,13391.0,82439.0,47266.0,1637.0,6.273473
50%,27190.0,37.6,802.0,16870.0,97368.0,66670.0,2372.0,9.58892
75%,36365.0,39.4,1140.0,21853.0,124795.0,78542.0,3874.0,11.891142
max,53729.0,49.2,1973.0,36694.0,148878.0,102595.0,10083.0,20.391118


In [9]:
desired_zip_codes = ["75001", "75006", "75007", "75019", "75032", "75039", "75041", "75042", "75043", "75050", "75051", "75052", "75061",
          "75062", "75063", "75080", "75081", "75087", "75088", "75089", "75093", "75098", "75104", "75115", "75116", "75126",
          "75134", "75149", "75150", "75159", "75166", "75180", "75181", "75182", "75201", "75202", "75203", "75204", "75205",
          "75206", "75207", "75208", "75209", "75210", "75211", "75212", "75214", "75215", "75216", "75217", "75218", "75219",
          "75220", "75221", "75222", "75223", "75224", "75225", "75226", "75227", "75228", "75229", "75230", "75231", "75232",
          "75233", "75234", "75235", "75236", "75237", "75238", "75240", "75241", "75242", "75243", "75244", "75246", "75247",
          "75248", "75249", "75250", "75251", "75252", "75253", "75254", "75260", "75261", "75262", "75263", "75264", "75265",
          "75266", "75267", "75270", "75275", "75277", "75283", "75284", "75285", "75287", "75301", "75303", "75312", "75313",
          "75315", "75320", "75326", "75336", "75339", "75342", "75354", "75355", "75356", "75357", "75358", "75359", "75360",
          "75367", "75368", "75370", "75371", "75372", "75373", "75374", "75376", "75378", "75379", "75380", "75381", "75382",
          "75389", "75390", "75391", "75392", "75393", "75394", "75395", "75397", "75398"]

dallas_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

dallas_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
26485,75217,87767.0,27.6,1149.0,34612.0,44384.0,15227.0,23667.0,26.965716
26484,75216,55108.0,33.2,1646.0,20627.0,31063.0,16151.0,17532.0,31.813893
26480,75211,75213.0,30.9,2221.0,36527.0,50368.0,19843.0,14837.0,19.726643
26507,75243,65118.0,33.8,2226.0,35729.0,43673.0,27970.0,14069.0,21.605393
26494,75228,71450.0,30.7,1855.0,34329.0,51018.0,23418.0,13823.0,19.346396
...,...,...,...,...,...,...,...,...,...
26513,75251,3804.0,30.7,33.0,2905.0,78245.0,50352.0,169.0,4.442692
26461,75166,5589.0,35.0,59.0,3319.0,105972.0,40261.0,122.0,2.182859
26517,75261,0.0,-666666666.0,0.0,0.0,-666666666.0,-666666666.0,0.0,
26518,75270,0.0,-666666666.0,0.0,0.0,-666666666.0,-666666666.0,0.0,


In [10]:
dallas_df = dallas_df.dropna()

In [11]:
dallas_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,82.0,82.0,82.0,82.0,82.0,82.0,82.0,82.0
mean,33930.536585,34.721951,895.926829,18207.463415,73419.02439,40541.341463,4477.170732,13.87677
std,20033.706346,4.785963,589.453121,10423.196441,29507.249283,23118.815731,4084.80397,10.254064
min,870.0,25.6,6.0,245.0,17361.0,9645.0,122.0,2.182859
25%,19116.75,31.8,471.25,9699.25,50530.5,23434.75,1610.75,6.95179
50%,31804.5,33.8,800.0,16966.5,68180.5,34171.0,3491.5,11.456524
75%,46232.0,37.1,1191.0,25307.5,95805.25,50137.25,6170.75,19.530116
max,95745.0,56.5,2703.0,51564.0,169547.0,120241.0,23667.0,72.873563


In [12]:
desired_zip_codes = ["94101", "94102", "94103", "94104", "94105", "94107", "94108", "94109", "94110", "94111", "94112", "94114",
                 "94115", "94116", "94117", "94118", "94119", "94120", "94121", "94122", "94123", "94124", "94125", "94126",
                 "94127", "94129", "94130", "94131", "94132", "94133", "94134", "94140", "94141", "94142", "94146", "94147",
                 "94157", "94159", "94164", "94165", "94166", "94167", "94168", "94169", "94170", "94172", "94188"]

sanfransisco_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

sanfransisco_df = sanfransisco_df.dropna()

sanfransisco_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
31601,94112,84477.0,42.5,2853.0,48306.0,112795.0,43658.0,7560.0,8.949181
31599,94110,70859.0,38.3,2409.0,47029.0,143938.0,79561.0,6939.0,9.792687
31598,94109,56114.0,37.2,2322.0,39355.0,104476.0,91684.0,6850.0,12.207292
31610,94124,35982.0,37.7,1638.0,17887.0,66618.0,33172.0,6440.0,17.897838
31592,94102,33856.0,39.6,1541.0,20679.0,55888.0,60673.0,6092.0,17.993856
31593,94103,32430.0,36.4,1215.0,22033.0,93143.0,73194.0,5202.0,16.040703
31608,94122,60144.0,41.0,1976.0,36305.0,130708.0,64313.0,5001.0,8.315044
31618,94134,41977.0,43.0,1402.0,23272.0,93068.0,41443.0,4747.0,11.308574
31607,94121,43964.0,41.6,1384.0,25418.0,116970.0,71120.0,4224.0,9.607861
31603,94115,34193.0,36.7,811.0,21835.0,138023.0,100897.0,4195.0,12.268593


In [13]:
sanfransisco_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,32895.192308,39.165385,1092.730769,20537.807692,128804.461538,79984.846154,3336.576923,12.036861
std,20519.889423,5.119917,739.801247,12757.545106,51976.355537,32457.030683,2225.385615,7.910814
min,504.0,27.8,71.0,279.0,42591.0,30518.0,112.0,3.357942
25%,22195.75,36.425,512.25,12281.5,93086.75,59927.5,1543.75,6.367984
50%,33143.0,38.95,1068.0,20686.0,132680.0,75041.0,2786.0,10.55063
75%,41846.75,42.325,1404.25,25356.25,168166.5,96966.5,4937.5,15.679435
max,84477.0,51.5,2853.0,48306.0,244662.0,172211.0,7560.0,41.098901


In [14]:
desired_zip_codes = ["44102", "44103", "44104", "44105", "44106", "44107", "44108", "44109", "44110", "44111", "44112", "44113",
             "44114", "44115", "44117", "44118", "44119", "44120", "44121", "44124", "44125", "44126", "44127", "44128", "44129",
             "44130", "44134", "44135", "44137", "44142", "44143", "44144", "44181", "44188", "44190", "44191", "44192" ,"44193",
             "44194", "44195", "44197", "44198", "44199"]

cleveland_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

cleveland_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
14495,44102,42640.0,34.4,2211.0,20900.0,33188.0,24289.0,13713.0,32.159944
14498,44105,34339.0,39.4,2666.0,15637.0,31690.0,18734.0,11179.0,32.554821
14502,44109,39502.0,35.9,2296.0,20075.0,36777.0,21757.0,10210.0,25.846793
14497,44104,18830.0,31.9,1658.0,7193.0,18591.0,14485.0,9696.0,51.4923
14504,44111,42366.0,36.5,1953.0,22483.0,47076.0,26320.0,9178.0,21.663598
14513,44120,34629.0,41.1,1459.0,16623.0,40563.0,34366.0,7623.0,22.013341
14501,44108,21711.0,36.7,1536.0,8930.0,29763.0,22281.0,7228.0,33.29188
14528,44135,26836.0,39.2,1476.0,13759.0,41622.0,25742.0,7175.0,26.736473
14503,44110,18449.0,40.8,977.0,7825.0,24994.0,17661.0,7033.0,38.121307
14499,44106,24824.0,31.3,979.0,10842.0,32745.0,33352.0,6654.0,26.804705


In [15]:
cleveland_df = cleveland_df.dropna()

cleveland_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
14495,44102,42640.0,34.4,2211.0,20900.0,33188.0,24289.0,13713.0,32.159944
14498,44105,34339.0,39.4,2666.0,15637.0,31690.0,18734.0,11179.0,32.554821
14502,44109,39502.0,35.9,2296.0,20075.0,36777.0,21757.0,10210.0,25.846793
14497,44104,18830.0,31.9,1658.0,7193.0,18591.0,14485.0,9696.0,51.4923
14504,44111,42366.0,36.5,1953.0,22483.0,47076.0,26320.0,9178.0,21.663598
14513,44120,34629.0,41.1,1459.0,16623.0,40563.0,34366.0,7623.0,22.013341
14501,44108,21711.0,36.7,1536.0,8930.0,29763.0,22281.0,7228.0,33.29188
14528,44135,26836.0,39.2,1476.0,13759.0,41622.0,25742.0,7175.0,26.736473
14503,44110,18449.0,40.8,977.0,7825.0,24994.0,17661.0,7033.0,38.121307
14499,44106,24824.0,31.3,979.0,10842.0,32745.0,33352.0,6654.0,26.804705


In [16]:
cleveland_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,26474.46875,38.55625,1213.28125,13596.0625,45011.40625,28787.46875,5517.5,23.544112
std,12458.705825,5.024645,590.974877,7240.978137,15940.452592,9534.767027,3038.802067,12.717518
min,4573.0,27.1,269.0,1891.0,18591.0,14485.0,1586.0,5.998611
25%,18556.25,34.55,869.5,8025.25,32481.25,22150.0,2948.0,14.144119
50%,24617.5,39.7,1108.0,12356.0,43374.0,26967.0,5489.0,22.28819
75%,35379.5,41.1,1563.5,18284.0,59013.5,33129.25,7068.5,32.258663
max,51451.0,51.0,2666.0,32621.0,70740.0,50953.0,13713.0,58.270089


In [17]:
desired_zip_codes = ["59801", "59802", "59803", "59804", "59806", "59807", "59808"]

missoula_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

missoula_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
20739,59801,30886.0,31.2,1172.0,20220.0,46112.0,32774.0,6287.0,20.355501
20740,59802,19790.0,35.5,695.0,12363.0,54450.0,33573.0,3263.0,16.488125
20743,59808,21669.0,37.6,513.0,12896.0,65567.0,39738.0,2363.0,10.904979
20741,59803,17052.0,39.6,341.0,9334.0,89946.0,45958.0,734.0,4.30448
20742,59804,7802.0,44.4,139.0,4083.0,75556.0,38872.0,317.0,4.063061


In [18]:
missoula_df = missoula_df.dropna()

In [19]:
missoula_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,19439.8,37.66,572.0,11779.2,66326.2,38183.0,2592.8,11.223229
std,8325.374538,4.888558,393.528906,5874.641836,17272.221577,5335.090252,2386.602858,7.251826
min,7802.0,31.2,139.0,4083.0,46112.0,32774.0,317.0,4.063061
25%,17052.0,35.5,341.0,9334.0,54450.0,33573.0,734.0,4.30448
50%,19790.0,37.6,513.0,12363.0,65567.0,38872.0,2363.0,10.904979
75%,21669.0,39.6,695.0,12896.0,75556.0,39738.0,3263.0,16.488125
max,30886.0,44.4,1172.0,20220.0,89946.0,45958.0,6287.0,20.355501


In [20]:
desired_zip_codes = ['55792']

virginia_minnesota_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

virginia_minnesota_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
19117,55792,9907.0,44.3,280.0,4381.0,47102.0,29338.0,2125.0,21.44948


In [22]:
desired_zip_codes = ["46107", "46183", "46201", "46202", "46203", "46204", "46205", "46206", "46207", "46208", "46214", "46214", "46216",
      "46217", "46217", "46218", "46219", "46220", "46221", "46222", "46224", "46224", "46225", "46226", "46226", "46227",
      "46227", "46228", "46229", "46229", "46230", "46231", "46231", "46234", "46234", "46235", "46235", "46236", "46236",
      "46237", "46237", "46239", "46239", "46240", "46240", "46241", "46241", "46242", "46244", "46247", "46247", "46250",
      "46250", "46251", "46253", "46253" ,"46254", "46254", "46256", "46256", "46259", "46259", "46260", "46260", "46268",
      "46268", "46278", "46278", "46282"]

indianapolis_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

indianapolis_df

Unnamed: 0,Zipcode,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
15374,46226,44764.0,33.8,2193.0,21794.0,37798.0,22464.0,11351.0,25.35743
15371,46222,38053.0,30.1,1564.0,17106.0,41502.0,17232.0,9703.0,25.498647
15375,46227,57359.0,33.6,1065.0,28934.0,47541.0,26974.0,8995.0,15.681933
15357,46201,31193.0,34.4,1741.0,15420.0,34790.0,22251.0,8795.0,28.195428
15359,46203,38829.0,35.0,1147.0,19262.0,44356.0,27025.0,8615.0,22.187025
15367,46218,29254.0,38.7,1420.0,12142.0,28758.0,18667.0,8567.0,29.284884
15385,46241,33756.0,30.5,1147.0,14576.0,44543.0,19696.0,8137.0,24.105344
15372,46224,39490.0,32.0,1135.0,20491.0,45668.0,23081.0,8117.0,20.554571
15387,46254,40540.0,31.6,1478.0,21218.0,51702.0,28157.0,7136.0,17.602368
15380,46235,33715.0,29.2,1067.0,17112.0,48699.0,22171.0,6832.0,20.263977


In [23]:
indianapolis_df = indianapolis_df.dropna()

In [24]:
indianapolis_df.describe()

Unnamed: 0,Population,Median Age,Unemployed,Employed,Household Income,Per Capita Income,Poverty Count,Poverty Rate
count,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0
mean,27261.694444,35.316667,784.055556,14213.361111,61214.194444,34314.083333,4194.0,14.515307
std,12278.847037,3.466781,505.045964,6185.38659,22622.702242,11230.017528,3168.86662,7.99291
min,2206.0,29.2,53.0,1261.0,28758.0,17232.0,185.0,3.109328
25%,19122.25,32.85,373.0,11148.75,46485.5,26844.5,1656.25,7.775379
50%,28031.0,35.4,689.0,14571.5,55556.0,31895.5,3078.5,13.382864
75%,37083.25,37.325,1084.0,19049.5,75289.25,44033.75,6908.0,20.731057
max,57359.0,45.0,2193.0,28934.0,135602.0,57479.0,11351.0,32.239344
