## Let's Try to Automate Census Data Collection

In [10]:
import geopandas as gpd
import pandas as pd
from census import Census
from us import states
from census.core import ACSClient
from shapely.geometry import Point

# Patch the ACSClient to handle 'long' data type
def _field_type(self, field, year):
    types = {
        'int': int,
        'float': float,
        'string': str,
        'long': int  # Add this line
    }
    return types.get(field, str)

ACSClient._field_type = _field_type

# Initialize Census API with your API key
c = Census("5dc8bf38af7b9e4766c7afba29c4ab08c539ec5f")

# Define a function to fetch and process data for a given state and county
def fetch_census_data(state_fips, county_fips):
    data = c.acs5.state_county_tract(
        fields=(
            'B02001_001E', 'B02001_002E', 'B02001_003E', 'B02001_004E', 'B02001_005E', 'B02001_006E',
            'B02001_007E', 'B02001_008E', 'B19013_001E', 'B23001_001E', 'B08141_001E', 'B08141_002E',
            'B08141_016E', 'B23001_001E', 'B25064_001E', 'B05001_001E', 'B05001_006E', 'B15001_001E',
            'B15002_001E', 'B15002_002E', 'B15002_019E', 'B15002_011E', 'B15002_028E', 'B15002_015E',
            'B15002_032E', 'B15002_016E', 'B15002_033E', 'B11005_001E', 'B11005_003E', 'B11005_005E',
            'B11005_008E', 'B11005_010E', 'B11007_001E', 'B11007_002E', 'B11007_004E', 'B11007_006E',
            'B16001_001E', 'B16001_002E', 'B16001_003E', 'B16001_004E', 'B16001_007E', 'B16001_037E',
            'B16001_043E', 'B16001_046E', 'B16001_064E', 'B18101_001E', 'B18101_002E', 'B18101_003E',
            'B08201_001E', 'B08201_002E', 'B08201_003E'
        ),  
        state_fips=state_fips,
        county_fips=county_fips,
        tract='*'  # Get all tracts
    )

    # Convert the data to a DataFrame
    data = pd.DataFrame(data)

    # Rename the columns for better readability
    data = data.rename(columns={
        'B02001_001E': 'Total_Population',
        'B02001_002E': 'White',
        'B02001_003E': 'Black',
        'B02001_004E': 'Native_American_Alaska_Native',
        'B02001_005E': 'Asian',
        'B02001_006E': 'Native_Hawaiian_Pacific_Islander',
        'B02001_007E': 'Other',
        'B02001_008E': 'Two_or_more_races',
        'B19013_001E': 'Median_Household_Income',
        'B23001_001E': 'Employed',
        'B08141_001E': 'Total_Commute',
        'B08141_002E': 'Commute_Drove_Alone',
        'B08141_016E': 'Commute_Public_Transportation',
        'B08201_001E': 'Total_Households',
        'B08201_002E': 'Zero_vehicles_available',
        'B08201_003E': '1_vehicle_available',
        'B23001_001E': 'Labor_Force',
        'B25064_001E': 'Median_Gross_Rent',
        'B05001_001E': 'US_Citizen',
        'B05001_006E': 'Not_a_US_Citizen',
        'B15001_001E': 'Education_status',
        'B15002_001E': 'Total_Population_25_and_Over',
        'B15002_002E': 'Male_Total',
        'B15002_019E': 'Female_Total',
        'B15002_011E': 'Male_High_School_Graduate',
        'B15002_028E': 'Female_High_School_Graduate',
        'B15002_015E': 'Male_Bachelor_Degree',
        'B15002_032E': 'Female_Bachelor_Degree',
        'B15002_016E': 'Male_Master_Degree',
        'B15002_033E': 'Female_Master_Degree',
        'B11005_003E': 'Households_with_Own_Children_Under_18',
        'B11005_005E': 'Married_Couple_Families_with_Own_Children_Under_18',
        'B11005_008E': 'Male_Householder_with_Own_Children_Under_18',
        'B11005_010E': 'Female_Householder_with_Own_Children_Under_18',
        'B11007_002E': 'Households_with_One_or_More_People_65_and_Over',
        'B11007_004E': 'Family_Households_with_One_or_More_People_65_and_Over',
        'B11007_006E': 'Nonfamily_Households_with_One_or_More_People_65_and_Over',
        'B16001_001E': 'Total_Population_5_and_Over',
        'B16001_002E': 'Speak_Only_English',
        'B16001_003E': 'Speak_Other_Languages',
        'B16001_004E': 'Speak_Spanish',
        'B16001_007E': 'Speak_French',
        'B16001_037E': 'Speak_Chinese',
        'B16001_043E': 'Speak_Korean',
        'B16001_046E': 'Speak_Vietnamese',
        'B16001_064E': 'Speak_Arabic',
        'B18101_001E': 'Total_Disability_Status',
        'B18101_002E': 'Total_With_Disability',
        'B18101_003E': 'Total_Without_Disability'
    })

    data = data.fillna(0)

    # Convert columns to numeric where applicable
    numeric_columns = ['Total_Population', 'White', 'Black', 'Native_American_Alaska_Native',
                       'Asian', 'Native_Hawaiian_Pacific_Islander', 'Other',
                       'Two_or_more_races', 'Total_Households', 'Households_with_Own_Children_Under_18',
                       'Married_Couple_Families_with_Own_Children_Under_18',
                       'Male_Householder_with_Own_Children_Under_18',
                       'Female_Householder_with_Own_Children_Under_18',
                       'Households_with_One_or_More_People_65_and_Over',
                       'Family_Households_with_One_or_More_People_65_and_Over',
                       'Nonfamily_Households_with_One_or_More_People_65_and_Over',
                       'Total_Population_5_and_Over', 'Speak_Only_English',
                       'Speak_Other_Languages', 'Speak_Spanish', 'Speak_French',
                       'Speak_Chinese', 'Speak_Korean', 'Speak_Vietnamese', 'Speak_Arabic',
                       'Total_Disability_Status', 'Total_With_Disability',
                       'Total_Without_Disability', 'Labor_Force', 'Total_Commute',
                       'Commute_Drove_Alone', 'Commute_Public_Transportation',
                       'Median_Gross_Rent', 'Zero_vehicles_available', '1_vehicle_available',
                       'Male_High_School_Graduate', 'Female_High_School_Graduate','Male_Bachelor_Degree','Female_Bachelor_Degree',
                      'Male_Master_Degree', 'Female_Master_Degree']

    data[numeric_columns] = data[numeric_columns].apply(pd.to_numeric, errors='coerce')

    # Calculate the Total Population
    data['Total_Population'] = data[['White', 'Black', 'Native_American_Alaska_Native', 'Asian',
                                     'Native_Hawaiian_Pacific_Islander', 'Other', 'Two_or_more_races']].sum(axis=1)
    return data

# Create a dictionary with state names and their corresponding counties and FIPS codes
counties = {
    'California': [('Los Angeles', '037')],
    'New York': [('New York', '061'), ('Kings', '047'), ('Bronx', '005')],
    'Illinois': [('Cook', '031')],
    'Texas': [('Harris', '201'), ('Dallas', '113')],
    'Arizona': [('Maricopa', '013')],
}

# Function to fetch tract shapefiles
def fetch_tract_shapefiles(state_fips, county_fips):
    base_url = f"https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_{state_fips}_tract.zip"
    gdf = gpd.read_file(base_url)
    return gdf

# Fetch data for all the counties and merge with shapefiles
all_data = []
for state, counties_list in counties.items():
    state_fips = states.lookup(state).fips
    for county_name, county_fips in counties_list:
        print(f'Fetching data for {county_name} County, {state}')
        census_data = fetch_census_data(state_fips, county_fips)
        tract_shapefiles = fetch_tract_shapefiles(state_fips, county_fips)

        # Merge census data with shapefiles
        merged_data = tract_shapefiles.merge(census_data, left_on='GEOID', right_on='tract')
        merged_data['State'] = state
        merged_data['County'] = county_name

        all_data.append(merged_data)

# Concatenate all the data into a single GeoDataFrame
final_gdf = gpd.GeoDataFrame(pd.concat(all_data, ignore_index=True))

# Export the GeoDataFrame to a file
final_gdf.to_file('census_data_all_counties.shp', driver='ESRI Shapefile')

# Alternatively, save as a shapefile if preferred
# final_gdf.to_file('census_data_all_counties.shp')


Fetching data for Los Angeles County, California


  data = data.fillna(0)


Fetching data for New York County, New York


  data = data.fillna(0)


Fetching data for Kings County, New York


  data = data.fillna(0)


Fetching data for Bronx County, New York


  data = data.fillna(0)


Fetching data for Cook County, Illinois


  data = data.fillna(0)


Fetching data for Harris County, Texas


  data = data.fillna(0)


Fetching data for Dallas County, Texas


  data = data.fillna(0)


Fetching data for Maricopa County, Arizona


  data = data.fillna(0)
  final_gdf.to_file('census_data_all_counties.shp', driver='ESRI Shapefile')
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(


In [31]:
import geopandas as gpd
from us import states
from census import Census
from shapely.geometry import Polygon

# Initialize Census API with your API key
c = Census("5dc8bf38af7b9e4766c7afba29c4ab08c539ec5f")

# Function to fetch tract shapefiles for a city
def fetch_city_tract_shapefiles(city, state):
    # Retrieve the geometry for the specified city using the Census API
    city_geo = c.acs5.geo([('NAME', 'tract')], states.lookup(state).fips, 'place', city, year=2019)
    
    # Extract the geometry for the specified city
    city_geometry = next(item for item in city_geo if item['NAME'] == city)
    city_polygon = Polygon(city_geometry['geometry']['coordinates'][0][0])
    
    # Retrieve the tract data for the specified state
    state_geo = c.acs5.geo_tract(('NAME', 'TRACT'), states.lookup(state).fips, 'state', state, year=2019)
    
    # Filter the tract data to include only tracts within the city polygon
    filtered_geo = [item for item in state_geo if Polygon(item['geometry']['coordinates'][0][0]).within(city_polygon)]
    
    # Extract the polygons for each census tract
    polygons = [Polygon(c['geometry']['coordinates'][0][0]) for c in filtered_geo]
    
    # Create a GeoDataFrame from the polygons
    city_gdf = gpd.GeoDataFrame(geometry=polygons)
    
    return city_gdf

# Example cities and states
cities = {
    'Chicago': 'IL',
    'Los Angeles': 'CA',
    'New York': 'NY',
    'Houston': 'TX',
    'Phoenix': 'AZ'
}

# Example usage
for city, state in cities.items():
    print(f'Fetching shapefile for {city}, {state}')
    city_shapefile = fetch_city_tract_shapefiles(city, state)
    print(city_shapefile)


Fetching shapefile for Chicago, IL


AttributeError: 'ACS5Client' object has no attribute 'geo'