This notebook provides code for importing Census TIGER/Line shape files and joining them to a GeoJSON file containing transit station coordinates and areas. It includes seperate code for block and block group shapefiles. TIGER/Line data can be found at https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html.  

### **Import and Upload Census TIGER/Line Block Shapefiles**

In [None]:
import zipfile
import os

# Define the paths to the zip files and the directory where to extract them
zip_files = [
    '/content/tl_2020_36_tabblock20.zip',  # 2020 New York State TIGER/Line Block File
]

# Directory where files will be extracted
extract_dir_base = '/mnt/data/shapefiles/'

# Function to unzip files
def unzip_shapefiles(zip_files, extract_dir_base):
    # Create base directory if it does not exist
    if not os.path.exists(extract_dir_base):
        os.makedirs(extract_dir_base)

    # Loop through the list of zip files and extract each
    for zip_path in zip_files:
        # Determine extraction path (folder name based on the zip file)
        extract_path = os.path.join(extract_dir_base, os.path.basename(zip_path).replace('.zip', ''))
        os.makedirs(extract_path, exist_ok=True)

        # Extract the zip file
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        print(f"Files extracted to: {extract_path}")

# Unzip all the shapefiles
unzip_shapefiles(zip_files, extract_dir_base)


Files extracted to: /mnt/data/shapefiles/tl_2020_36_tabblock20


In [None]:
import geopandas as gpd
import pandas as pd
import os

# Paths where the files were extracted
extracted_paths = [
    '/mnt/data/shapefiles/tl_2020_36_tabblock20',  # New York State
]

# Function to read shapefiles into GeoDataFrames and concatenate them
def concatenate_shapefiles(paths):
    gdfs = []
    for path in paths:
        # Each directory contains one shapefile (.shp) and associated files
        for filename in os.listdir(path):
            if filename.endswith(".shp"):
                file_path = os.path.join(path, filename)
                gdf = gpd.read_file(file_path)
                gdfs.append(gdf)
                break  # We assume there's only one .shp file in each directory
    # Concatenate all GeoDataFrames into one
    concatenated_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
    return concatenated_gdf

# Concatenate all the shapefiles
consolidated_gdf = concatenate_shapefiles(extracted_paths)

# Define path for the consolidated shapefile
consolidated_shapefile_path = '/mnt/data/shapefiles/consolidated_bg.shp'

# Save the consolidated GeoDataFrame to a new shapefile
consolidated_gdf.to_file(consolidated_shapefile_path)


In [None]:
import os

# Path where the consolidated shapefile components are stored
consolidated_dir = '/mnt/data/shapefiles/'

# List all the components of the shapefile
shapefile_components = [f for f in os.listdir(consolidated_dir) if 'consolidated_bg' in f]

# Create a dictionary with file names and their paths
shapefile_paths = {component: os.path.join(consolidated_dir, component) for component in shapefile_components}

shapefile_paths


{'consolidated_bg.shp': '/mnt/data/shapefiles/consolidated_bg.shp',
 'consolidated_bg.cpg': '/mnt/data/shapefiles/consolidated_bg.cpg',
 'consolidated_bg.dbf': '/mnt/data/shapefiles/consolidated_bg.dbf',
 'consolidated_bg.prj': '/mnt/data/shapefiles/consolidated_bg.prj',
 'consolidated_bg.shx': '/mnt/data/shapefiles/consolidated_bg.shx'}

### **Join the TIGER/Line ShapeFiles with the Transit Station GeoJSON File**

In [None]:
import geopandas as gpd

# Assuming you've uploaded the 'buffered_stations.geojson' to your Colab environment
stations_geojson_path = '/content/buffered_stations (1).geojson'
# Assuming you've uploaded the 'consolidated_bg' shapefile components to your Colab environment
consolidated_shapefile_path = '/mnt/data/shapefiles/consolidated_bg.shp'

# Load the GeoJSON and shapefile into GeoDataFrames
stations_gdf = gpd.read_file(stations_geojson_path)
blocks_gdf = gpd.read_file(consolidated_shapefile_path)

# Check the CRS for both GeoDataFrames
print("Stations CRS: ", stations_gdf.crs)
print("Blocks CRS: ", blocks_gdf.crs)

# If the blocks GeoDataFrame does not have a CRS, set it
if blocks_gdf.crs is None:
    blocks_gdf.set_crs(stations_gdf.crs, inplace=True)

# If the CRS are different, align the CRS of the blocks to match the stations
if blocks_gdf.crs != stations_gdf.crs:
    blocks_gdf = blocks_gdf.to_crs(stations_gdf.crs)

# Perform the spatial join
joined_gdf = gpd.sjoin(stations_gdf, blocks_gdf, how="inner", predicate='intersects')

# Display the first few rows of the joined GeoDataFrame
joined_gdf.head()


Stations CRS:  EPSG:4326
Blocks CRS:  EPSG:4269


Unnamed: 0,RecordID,NTD ID,Agency Name,Reporter Type,Reporting Module,Primary Mode Served,Facility ID,Facility Type,Facility Name,City,...,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,26635,0,40.8292743,-73.9036296,362,1184
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,13278,0,40.8298271,-73.904559,110,234
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,20606,0,40.8300915,-73.9055936,474,1231
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,18584,0,40.8304113,-73.9067524,357,1139
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,20970,0,40.8307947,-73.9078892,81,344


### **Download the Joined File**

In [None]:
joined_gdf.to_csv('2020Censusblockshousingpop.csv')
from google.colab import files
files.download('2020Censusblockshousingpop.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### **Upload 5-yearACS TIGER/Line Shapefiles**

In [None]:
import zipfile
import os

# Define the paths to the zip files and the directory where to extract them
zip_files = [
    '',  # 2020 5-year ACS New York State TIGER/Line Block File
]

# Directory where files will be extracted
extract_dir_base = '/mnt/data/shapefiles/'

# Function to unzip files
def unzip_shapefiles(zip_files, extract_dir_base):
    # Create base directory if it does not exist
    if not os.path.exists(extract_dir_base):
        os.makedirs(extract_dir_base)

    # Loop through the list of zip files and extract each
    for zip_path in zip_files:
        # Determine extraction path (folder name based on the zip file)
        extract_path = os.path.join(extract_dir_base, os.path.basename(zip_path).replace('.zip', ''))
        os.makedirs(extract_path, exist_ok=True)

        # Extract the zip file
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        print(f"Files extracted to: {extract_path}")

# Unzip all the shapefiles
unzip_shapefiles(zip_files, extract_dir_base)


Files extracted to: /mnt/data/shapefiles/tl_2020_36_tabblock20


In [None]:
import geopandas as gpd
import pandas as pd
import os

# Paths where the files were extracted
extracted_paths = [
    '/mnt/data/shapefiles/tl_2020_36_tabblock20',  # New York State
]

# Function to read shapefiles into GeoDataFrames and concatenate them
def concatenate_shapefiles(paths):
    gdfs = []
    for path in paths:
        # Each directory contains one shapefile (.shp) and associated files
        for filename in os.listdir(path):
            if filename.endswith(".shp"):
                file_path = os.path.join(path, filename)
                gdf = gpd.read_file(file_path)
                gdfs.append(gdf)
                break  # We assume there's only one .shp file in each directory
    # Concatenate all GeoDataFrames into one
    concatenated_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
    return concatenated_gdf

# Concatenate all the shapefiles
consolidated_gdf = concatenate_shapefiles(extracted_paths)

# Define path for the consolidated shapefile
consolidated_shapefile_path = '/mnt/data/shapefiles/consolidated_bg.shp'

# Save the consolidated GeoDataFrame to a new shapefile
consolidated_gdf.to_file(consolidated_shapefile_path)


In [None]:
import os

# Path where the consolidated shapefile components are stored
consolidated_dir = '/mnt/data/shapefiles/'

# List all the components of the shapefile
shapefile_components = [f for f in os.listdir(consolidated_dir) if 'consolidated_bg' in f]

# Create a dictionary with file names and their paths
shapefile_paths = {component: os.path.join(consolidated_dir, component) for component in shapefile_components}

shapefile_paths


{'consolidated_bg.shp': '/mnt/data/shapefiles/consolidated_bg.shp',
 'consolidated_bg.cpg': '/mnt/data/shapefiles/consolidated_bg.cpg',
 'consolidated_bg.dbf': '/mnt/data/shapefiles/consolidated_bg.dbf',
 'consolidated_bg.prj': '/mnt/data/shapefiles/consolidated_bg.prj',
 'consolidated_bg.shx': '/mnt/data/shapefiles/consolidated_bg.shx'}

## **Join the ACS TIGER/Line ShapeFiles with the Transit Station GeoJSON File**

In [None]:
import geopandas as gpd

# Assuming you've uploaded the 'buffered_stations.geojson' to your Colab environment
stations_geojson_path = '/content/buffered_stations (1).geojson'
# Assuming you've uploaded the 'consolidated_bg' shapefile components to your Colab environment
consolidated_shapefile_path = '/mnt/data/shapefiles/consolidated_bg.shp'

# Load the GeoJSON and shapefile into GeoDataFrames
stations_gdf = gpd.read_file(stations_geojson_path)
blocks_gdf = gpd.read_file(consolidated_shapefile_path)

# Check the CRS for both GeoDataFrames
print("Stations CRS: ", stations_gdf.crs)
print("Blocks CRS: ", blocks_gdf.crs)

# If the blocks GeoDataFrame does not have a CRS, set it
if blocks_gdf.crs is None:
    blocks_gdf.set_crs(stations_gdf.crs, inplace=True)

# If the CRS are different, align the CRS of the blocks to match the stations
if blocks_gdf.crs != stations_gdf.crs:
    blocks_gdf = blocks_gdf.to_crs(stations_gdf.crs)

# Perform the spatial join
joined_gdf = gpd.sjoin(stations_gdf, blocks_gdf, how="inner", predicate='intersects')

# Display the first few rows of the joined GeoDataFrame
joined_gdf.head()


Stations CRS:  EPSG:4326
Blocks CRS:  EPSG:4269


Unnamed: 0,RecordID,NTD ID,Agency Name,Reporter Type,Reporting Module,Primary Mode Served,Facility ID,Facility Type,Facility Name,City,...,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,26635,0,40.8292743,-73.9036296,362,1184
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,13278,0,40.8298271,-73.904559,110,234
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,20606,0,40.8300915,-73.9055936,474,1231
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,18584,0,40.8304113,-73.9067524,357,1139
56,97,40,Central Puget Sound Regional Transit Authority,Full Reporter,Urban,Light Rail,10451,Underground Fixed Guideway Station,Pioneer Square Station,Seattle,...,U,63217,U,S,20970,0,40.8307947,-73.9078892,81,344


## **Download the ACS Joined File**

In [None]:
joined_gdf.to_csv('2020ACS.csv')
from google.colab import files
files.download('2020ACS.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>