In [1]:
import pandas as pd
import json
import requests
import datetime
from pprint import pprint
from config import census_api_key
from census import Census
import scipy.stats as st

In [2]:
c = Census(
    census_api_key,
    year = 2021
)

In [3]:
census_data = c.acs5.get(
    (
        "NAME",
        "B19013_001E",
        "B01003_001E",
        "B17001_002E",
        "B23025_005E"
    ),
    {'for': 'zip code tabulation area:*'}
)

# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column renaming
census_df = census_df.rename(
    columns = {
        "B01003_001E": "Population",
        "B19013_001E": "Household Income",
        "B17001_002E": "Poverty Count",
        "B23025_005E": 'Unemployed',
        "NAME": "Name",
        "zip code tabulation area": "Zipcode"
    }
)

# Add a Poverty Rate column (Poverty Count / Population)
census_df["Poverty Rate"] = 100 * census_df["Poverty Count"].astype(int) / census_df["Population"].astype(int)

# Configure the final DataFrame
census_df = census_df[
    [
        "Zipcode",
        "Population",
        'Unemployed',
        "Household Income",
        "Poverty Count",
        "Poverty Rate",
        
    ]
]

# Display DataFrame length and sample data
print(f"Number of rows in the DataFrame: {len(census_df)}")
census_df.head()

Number of rows in the DataFrame: 33774


Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
0,601,17126.0,1396.0,15292.0,11302.0,65.993227
1,602,37895.0,939.0,18716.0,17121.0,45.180103
2,603,49136.0,2712.0,16789.0,23617.0,48.064556
3,606,5751.0,113.0,18835.0,3139.0,54.581812
4,610,26153.0,855.0,21239.0,11640.0,44.507322


In [4]:
poverty_df = census_df.sort_values(by='Poverty Count', ascending=False)
poverty_df.head()

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
2664,10456,90314.0,5674.0,31166.0,33294.0,36.864716
2495,8701,130352.0,2580.0,53380.0,32214.0,24.713085
2863,11219,93119.0,2807.0,44450.0,31973.0,34.335635
58,725,79153.0,4302.0,24262.0,31858.0,40.248632
2850,11206,90903.0,3649.0,49013.0,31273.0,34.402605


In [5]:
desired_zip_codes = ['44145', '75206', '55792', '98101', '44312', '79735']

filtered_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

filtered_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
26475,75206,38209.0,847.0,79468.0,4485.0,11.738072
14596,44312,30769.0,1169.0,57217.0,2847.0,9.252819
28264,79735,13267.0,75.0,54051.0,2599.0,19.58996
19117,55792,9907.0,280.0,47102.0,2125.0,21.44948
32979,98101,14528.0,303.0,96893.0,1799.0,12.382985
14538,44145,34049.0,737.0,98285.0,1638.0,4.810714


In [6]:
desired_zip_codes = ['98101', '98102', '98103', '98104', '98105', '98106', '98107', '98108', '98109', '98110', '98111', '98112', '98113', '98114', '98115', '98116', '98117', '98118', '98119', '98121', '98122', '98124', '98125', '98126', '98127', '98129', '98131', '98133','98136', '98138', '98139', '98141', '98144', '98145', '98146', '98148', '98155', '98160', '98161', '98165', '98166', '98168', '98170', '98175', '98177', '98178', '98181', '98185', '98188', '98190', '98191', '98194', '98198', '98199']

seattle_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

seattle_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
32983,98105,49448.0,1552.0,73343.0,10083.0,20.391118
32993,98118,48937.0,1266.0,94790.0,6188.0,12.644829
32996,98122,41541.0,1973.0,87418.0,5868.0,14.125803
32999,98133,50153.0,1184.0,77602.0,5188.0,10.344346
32997,98125,43386.0,1121.0,82439.0,4522.0,10.422717
33016,98198,38919.0,1587.0,69833.0,4501.0,11.565045
32986,98108,23759.0,714.0,74985.0,4471.0,18.818132
33010,98168,34614.0,1533.0,71977.0,4116.0,11.891142
32990,98115,53729.0,1292.0,128443.0,3874.0,7.210259
32984,98106,27489.0,585.0,84506.0,3673.0,13.361708


In [7]:
seattle_df = seattle_df.dropna()

In [8]:
numeric_columns_seattle = seattle_df.select_dtypes(include='number')
seattle_stats_data = numeric_columns_seattle.mean()
seattle_std_data = numeric_columns_seattle.std(skipna=True)

seattle_stats = pd.DataFrame(seattle_stats_data).transpose()
seattle_stats.index = ['Seattle']

seattle_std = pd.DataFrame(seattle_std_data).transpose()
seattle_std.index = ['Seattle Std Dev']

seattle_stats = pd.concat([seattle_stats, seattle_std])

seattle_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Seattle,30318.333333,870.030303,102513.090909,2966.151515,9.686978
Seattle Std Dev,11474.827057,410.814168,25487.077011,1934.687412,4.380641


In [9]:
desired_zip_codes = ["75001", "75006", "75007", "75019", "75032", "75039", "75041", "75042", "75043", "75050", "75051", "75052", "75061",
          "75062", "75063", "75080", "75081", "75087", "75088", "75089", "75093", "75098", "75104", "75115", "75116", "75126",
          "75134", "75149", "75150", "75159", "75166", "75180", "75181", "75182", "75201", "75202", "75203", "75204", "75205",
          "75206", "75207", "75208", "75209", "75210", "75211", "75212", "75214", "75215", "75216", "75217", "75218", "75219",
          "75220", "75221", "75222", "75223", "75224", "75225", "75226", "75227", "75228", "75229", "75230", "75231", "75232",
          "75233", "75234", "75235", "75236", "75237", "75238", "75240", "75241", "75242", "75243", "75244", "75246", "75247",
          "75248", "75249", "75250", "75251", "75252", "75253", "75254", "75260", "75261", "75262", "75263", "75264", "75265",
          "75266", "75267", "75270", "75275", "75277", "75283", "75284", "75285", "75287", "75301", "75303", "75312", "75313",
          "75315", "75320", "75326", "75336", "75339", "75342", "75354", "75355", "75356", "75357", "75358", "75359", "75360",
          "75367", "75368", "75370", "75371", "75372", "75373", "75374", "75376", "75378", "75379", "75380", "75381", "75382",
          "75389", "75390", "75391", "75392", "75393", "75394", "75395", "75397", "75398"]

dallas_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

dallas_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
26485,75217,87767.0,1149.0,44384.0,23667.0,26.965716
26484,75216,55108.0,1646.0,31063.0,17532.0,31.813893
26480,75211,75213.0,2221.0,50368.0,14837.0,19.726643
26507,75243,65118.0,2226.0,43673.0,14069.0,21.605393
26494,75228,71450.0,1855.0,51018.0,13823.0,19.346396
...,...,...,...,...,...,...
26513,75251,3804.0,33.0,78245.0,169.0,4.442692
26461,75166,5589.0,59.0,105972.0,122.0,2.182859
26517,75261,0.0,0.0,-666666666.0,0.0,
26518,75270,0.0,0.0,-666666666.0,0.0,


In [10]:
dallas_df = dallas_df.dropna()

In [11]:
numeric_columns_dallas = dallas_df.select_dtypes(include='number')
dallas_stats_data = numeric_columns_dallas.mean()
dallas_std_data = numeric_columns_dallas.std(skipna=True)

dallas_stats = pd.DataFrame(dallas_stats_data).transpose()
dallas_stats.index = ['Dallas']

dallas_std = pd.DataFrame(dallas_std_data).transpose()
dallas_std.index = ['Dallas Std Dev']

dallas_stats = pd.concat([dallas_stats, dallas_std])

dallas_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Dallas,33930.536585,895.926829,73419.02439,4477.170732,13.87677
Dallas Std Dev,20033.706346,589.453121,29507.249283,4084.80397,10.254064


In [12]:
desired_zip_codes = ["94101", "94102", "94103", "94104", "94105", "94107", "94108", "94109", "94110", "94111", "94112", "94114",
                 "94115", "94116", "94117", "94118", "94119", "94120", "94121", "94122", "94123", "94124", "94125", "94126",
                 "94127", "94129", "94130", "94131", "94132", "94133", "94134", "94140", "94141", "94142", "94146", "94147",
                 "94157", "94159", "94164", "94165", "94166", "94167", "94168", "94169", "94170", "94172", "94188"]

sanfransisco_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

sanfransisco_df = sanfransisco_df.dropna()

sanfransisco_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
31601,94112,84477.0,2853.0,112795.0,7560.0,8.949181
31599,94110,70859.0,2409.0,143938.0,6939.0,9.792687
31598,94109,56114.0,2322.0,104476.0,6850.0,12.207292
31610,94124,35982.0,1638.0,66618.0,6440.0,17.897838
31592,94102,33856.0,1541.0,55888.0,6092.0,17.993856
31593,94103,32430.0,1215.0,93143.0,5202.0,16.040703
31608,94122,60144.0,1976.0,130708.0,5001.0,8.315044
31618,94134,41977.0,1402.0,93068.0,4747.0,11.308574
31607,94121,43964.0,1384.0,116970.0,4224.0,9.607861
31603,94115,34193.0,811.0,138023.0,4195.0,12.268593


In [13]:
numeric_columns_sanfransisco = sanfransisco_df.select_dtypes(include='number')
sanfransisco_stats_data = numeric_columns_sanfransisco.mean()
sanfransisco_std_data = numeric_columns_sanfransisco.std(skipna=True)

sanfransisco_stats = pd.DataFrame(sanfransisco_stats_data).transpose()
sanfransisco_stats.index = ['Sanfransisco']

sanfransisco_std = pd.DataFrame(sanfransisco_std_data).transpose()
sanfransisco_std.index = ['Sanfransisco Std Dev']

sanfransisco_stats = pd.concat([sanfransisco_stats, sanfransisco_std])

sanfransisco_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Sanfransisco,32895.192308,1092.730769,128804.461538,3336.576923,12.036861
Sanfransisco Std Dev,20519.889423,739.801247,51976.355537,2225.385615,7.910814


In [14]:
desired_zip_codes = ["44102", "44103", "44104", "44105", "44106", "44107", "44108", "44109", "44110", "44111", "44112", "44113",
             "44114", "44115", "44117", "44118", "44119", "44120", "44121", "44124", "44125", "44126", "44127", "44128", "44129",
             "44130", "44134", "44135", "44137", "44142", "44143", "44144", "44181", "44188", "44190", "44191", "44192" ,"44193",
             "44194", "44195", "44197", "44198", "44199"]

cleveland_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

cleveland_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
14495,44102,42640.0,2211.0,33188.0,13713.0,32.159944
14498,44105,34339.0,2666.0,31690.0,11179.0,32.554821
14502,44109,39502.0,2296.0,36777.0,10210.0,25.846793
14497,44104,18830.0,1658.0,18591.0,9696.0,51.4923
14504,44111,42366.0,1953.0,47076.0,9178.0,21.663598
14513,44120,34629.0,1459.0,40563.0,7623.0,22.013341
14501,44108,21711.0,1536.0,29763.0,7228.0,33.29188
14528,44135,26836.0,1476.0,41622.0,7175.0,26.736473
14503,44110,18449.0,977.0,24994.0,7033.0,38.121307
14499,44106,24824.0,979.0,32745.0,6654.0,26.804705


In [15]:
cleveland_df = cleveland_df.dropna()

cleveland_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
14495,44102,42640.0,2211.0,33188.0,13713.0,32.159944
14498,44105,34339.0,2666.0,31690.0,11179.0,32.554821
14502,44109,39502.0,2296.0,36777.0,10210.0,25.846793
14497,44104,18830.0,1658.0,18591.0,9696.0,51.4923
14504,44111,42366.0,1953.0,47076.0,9178.0,21.663598
14513,44120,34629.0,1459.0,40563.0,7623.0,22.013341
14501,44108,21711.0,1536.0,29763.0,7228.0,33.29188
14528,44135,26836.0,1476.0,41622.0,7175.0,26.736473
14503,44110,18449.0,977.0,24994.0,7033.0,38.121307
14499,44106,24824.0,979.0,32745.0,6654.0,26.804705


In [16]:
numeric_columns_cleveland = cleveland_df.select_dtypes(include='number')
cleveland_stats_data = numeric_columns_cleveland.mean()
cleveland_std_data = numeric_columns_cleveland.std(skipna=True)

cleveland_stats = pd.DataFrame(cleveland_stats_data).transpose()
cleveland_stats.index = ['Cleveland']

cleveland_std = pd.DataFrame(cleveland_std_data).transpose()
cleveland_std.index = ['Cleveland Std Dev']

cleveland_stats = pd.concat([cleveland_stats, cleveland_std])

cleveland_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Cleveland,26474.46875,1213.28125,45011.40625,5517.5,23.544112
Cleveland Std Dev,12458.705825,590.974877,15940.452592,3038.802067,12.717518


In [17]:
desired_zip_codes = ["59801", "59802", "59803", "59804", "59806", "59807", "59808"]

missoula_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

missoula_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
20739,59801,30886.0,1172.0,46112.0,6287.0,20.355501
20740,59802,19790.0,695.0,54450.0,3263.0,16.488125
20743,59808,21669.0,513.0,65567.0,2363.0,10.904979
20741,59803,17052.0,341.0,89946.0,734.0,4.30448
20742,59804,7802.0,139.0,75556.0,317.0,4.063061


In [18]:
missoula_df = missoula_df.dropna()

In [19]:
numeric_columns_missoula = missoula_df.select_dtypes(include='number')
missoula_stats_data = numeric_columns_missoula.mean()
missoula_std_data = numeric_columns_missoula.std(skipna=True)

missoula_stats = pd.DataFrame(missoula_stats_data).transpose()
missoula_stats.index = ['Missoula']

missoula_std = pd.DataFrame(missoula_std_data).transpose()
missoula_std.index = ['Missoula Std Dev']

missoula_stats = pd.concat([missoula_stats, missoula_std])

missoula_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Missoula,19439.8,572.0,66326.2,2592.8,11.223229
Missoula Std Dev,8325.374538,393.528906,17272.221577,2386.602858,7.251826


In [20]:
desired_zip_codes = ['55792']

virginia_minnesota_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

virginia_minnesota_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
19117,55792,9907.0,280.0,47102.0,2125.0,21.44948


In [21]:
# Assuming virginia_minnesota_df is your original DataFrame
numeric_columns = virginia_minnesota_df.select_dtypes(include='number')
virginia_stats_data = numeric_columns.mean()
virginia_std_data = numeric_columns.std(skipna=True)  # Set skipna=True to exclude NaN values

# Create the DataFrame with mean statistics
virginia_stats = pd.DataFrame(virginia_stats_data).transpose()
virginia_stats.index = ['Virginia Minnesota']

# Concatenate the two DataFrames vertically to add the second row
virginia_stats = pd.concat([virginia_stats])

# Display the final DataFrame
virginia_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Virginia Minnesota,9907.0,280.0,47102.0,2125.0,21.44948


In [22]:
desired_zip_codes = ["46107", "46183", "46201", "46202", "46203", "46204", "46205", "46206", "46207", "46208", "46214", "46214", "46216",
      "46217", "46217", "46218", "46219", "46220", "46221", "46222", "46224", "46224", "46225", "46226", "46226", "46227",
      "46227", "46228", "46229", "46229", "46230", "46231", "46231", "46234", "46234", "46235", "46235", "46236", "46236",
      "46237", "46237", "46239", "46239", "46240", "46240", "46241", "46241", "46242", "46244", "46247", "46247", "46250",
      "46250", "46251", "46253", "46253" ,"46254", "46254", "46256", "46256", "46259", "46259", "46260", "46260", "46268",
      "46268", "46278", "46278", "46282"]

indianapolis_df = poverty_df[poverty_df['Zipcode'].isin(desired_zip_codes)]

indianapolis_df

Unnamed: 0,Zipcode,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
15374,46226,44764.0,2193.0,37798.0,11351.0,25.35743
15371,46222,38053.0,1564.0,41502.0,9703.0,25.498647
15375,46227,57359.0,1065.0,47541.0,8995.0,15.681933
15357,46201,31193.0,1741.0,34790.0,8795.0,28.195428
15359,46203,38829.0,1147.0,44356.0,8615.0,22.187025
15367,46218,29254.0,1420.0,28758.0,8567.0,29.284884
15385,46241,33756.0,1147.0,44543.0,8137.0,24.105344
15372,46224,39490.0,1135.0,45668.0,8117.0,20.554571
15387,46254,40540.0,1478.0,51702.0,7136.0,17.602368
15380,46235,33715.0,1067.0,48699.0,6832.0,20.263977


In [23]:
indianapolis_df = indianapolis_df.dropna()

In [24]:
numeric_columns_indy = indianapolis_df.select_dtypes(include='number')
indy_stats_data = numeric_columns_indy.mean()
indy_std_data = numeric_columns_indy.std(skipna=True)  # Set skipna=True to exclude NaN values

indy_stats = pd.DataFrame(indy_stats_data).transpose()
indy_stats.index = ['Indianapolis']

indy_std = pd.DataFrame(indy_std_data).transpose()
indy_std.index = ['Indy Std Dev']

indy_stats = pd.concat([indy_stats, indy_std])

indy_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Indianapolis,27261.694444,784.055556,61214.194444,4194.0,14.515307
Indy Std Dev,12278.847037,505.045964,22622.702242,3168.86662,7.99291


In [26]:
combined_stats = pd.concat([cleveland_stats, dallas_stats, indy_stats, missoula_stats, sanfransisco_stats, seattle_stats, virginia_stats])
combined_stats

Unnamed: 0,Population,Unemployed,Household Income,Poverty Count,Poverty Rate
Cleveland,26474.46875,1213.28125,45011.40625,5517.5,23.544112
Cleveland Std Dev,12458.705825,590.974877,15940.452592,3038.802067,12.717518
Dallas,33930.536585,895.926829,73419.02439,4477.170732,13.87677
Dallas Std Dev,20033.706346,589.453121,29507.249283,4084.80397,10.254064
Indianapolis,27261.694444,784.055556,61214.194444,4194.0,14.515307
Indy Std Dev,12278.847037,505.045964,22622.702242,3168.86662,7.99291
Missoula,19439.8,572.0,66326.2,2592.8,11.223229
Missoula Std Dev,8325.374538,393.528906,17272.221577,2386.602858,7.251826
Sanfransisco,32895.192308,1092.730769,128804.461538,3336.576923,12.036861
Sanfransisco Std Dev,20519.889423,739.801247,51976.355537,2225.385615,7.910814
