In [9]:
import geopandas as gpd
from bs4 import BeautifulSoup
import pandas as pd
import re
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
import os
from tqdm import tqdm

In [None]:
#Function to pull shapefile data
#Returns list of zip files in main directory

def get_zip(url):
    front_page = requests.get(url,verify=False)
    soup = BeautifulSoup(front_page.content,'html.parser')
    zf = soup.find_all("a",href=re.compile(r"zip"))
    zl = [os.path.join(url,i['href']) for i in zf]
    return zl

In [None]:
#parse through list of zip files and append together (might take too much time)
def get_geo_table(geography, url):
    files = get_zip(url)
    pages = []
    for zfile in tqdm(files, desc=f"{geography}",total=len(files)):
        table = gpd.read_file(zfile).iloc[:,:-1]
        pages.append(table)
    fulltable = pd.concat(pages)
    fulltable.to_csv(f"{geography}.csv")

In [None]:
#Block shape files
get_geo_table("block",r"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/TABBLOCK20/")

#block group shape files
get_geo_table("blockgroup",r"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/BG/")
    
#tract Shape files
get_geo_table("tract",r"https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/TRACT/")

#MetDiv shape files
get_geo_table("metdiv",r"https://www2.census.gov/geo/tiger/TIGER2020/METDIV/")

#CBSA shape files
get_geo_table("cbsa",r"https://www2.census.gov/geo/tiger/TIGER2020/CBSA/")
    
#CSA Shape files
get_geo_table("csa",r"https://www2.census.gov/geo/tiger/TIGER2020/CSA/")

#CD shape files
get_geo_table("CD",r"https://www2.census.gov/geo/tiger/TIGER2020/CD/CD118/")

#Place Shape Files
get_geo_table("place",r"https://www2.census.gov/geo/tiger/TIGER2020/PLACE/")

In [None]:
#County shape files
#Has New Connecticut counties https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/COUNTY/

url=r'https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/COUNTY/tl_rd22_us_county.zip'
pages = []
table = gpd.read_file(url).iloc[:,:-1]
pages.append(table)
fulltable = pd.concat(pages)
fulltable.to_csv("counties.csv")    

In [7]:
from bs4 import BeautifulSoup
import pandas as pd
import re
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
import os
from tqdm import tqdm

#Block to place and county
url = r"https://www2.census.gov/geo/maps/DC2020/DC20BLK/"
front_page = requests.get(url,verify=False)
soup = BeautifulSoup(front_page.content,'html.parser')
states = soup.find_all("a",href=re.compile(r"st([0-9]{2})_([a-z]{2})"))[:-1]
state_urls = [os.path.join(url,i['href'],"county/") for i in states]
state_urls[54] = state_urls[54]+'Maps_in_English/'
blocks_total = []


for state in state_urls:
    name = state[48:-8]
    block_table_county = []
    state_page = requests.get(state,verify=False)
    state_soup = BeautifulSoup(state_page.content,'html.parser')
    counties = state_soup.find_all("a",href=re.compile(r"c([0-9]{5})"))
    county_urls = [os.path.join(state,x['href'],"DC20BLK_"+x['href'][:6].upper()+"_BLK2MS.txt") for x in counties]
    for county in tqdm(county_urls, desc=f"{name}",total=len(county_urls)):
        data = pd.read_table(county, sep =';',header=0, dtype={'STATE':'str','COUNTY':'str','TRACT':'str','BLOCK':'str','PLACE':'str','COUSUB':'str'})
        block_table_county.append(data)
    blocks_total.append(pd.concat(block_table_county))

blocks = pd.concat(blocks_total)
blocks.to_csv("blocksCrosswalk.csv")

st01_al: 100%|█████████████████████████████████████████████████████████████████████████| 67/67 [00:10<00:00,  6.16it/s]
st02_ak: 100%|█████████████████████████████████████████████████████████████████████████| 30/30 [00:03<00:00,  7.85it/s]
st04_az: 100%|█████████████████████████████████████████████████████████████████████████| 15/15 [00:07<00:00,  2.08it/s]
st05_ar: 100%|█████████████████████████████████████████████████████████████████████████| 75/75 [00:21<00:00,  3.47it/s]
st06_ca: 100%|█████████████████████████████████████████████████████████████████████████| 58/58 [00:15<00:00,  3.79it/s]
st08_co: 100%|█████████████████████████████████████████████████████████████████████████| 64/64 [00:10<00:00,  6.24it/s]
st09_ct: 100%|███████████████████████████████████████████████████████████████████████████| 8/8 [00:01<00:00,  4.17it/s]
st10_de: 100%|███████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00,  4.80it/s]
st11_dc: 100%|██████████████████████████

In [6]:
cd2place = pd.read_table("https://www2.census.gov/geo/docs/maps-data/data/rel2020/cd-sld/tab20_cd11820_place20_natl.txt",
                         sep="|",
                         header=0,
                         dtype={'GEOID_PLACE_20':'str'}
                        )

  exec(code_obj, self.user_global_ns, self.user_ns)


In [8]:
cd2place.to_csv("place2cd.csv")

In [52]:
#Cousub to CD
cousub2cd = pd.read_table("https://www2.census.gov/geo/docs/maps-data/data/rel2020/cd-sld/tab20_cd11820_cousub20_natl.txt", sep="|",header=0)
cousub2cd.to_csv("cousub2cd.csv")

#Place to CD
cd2place = pd.read_table("https://www2.census.gov/geo/docs/maps-data/data/rel2020/cd-sld/tab20_cd11820_place20_natl.txt", sep="|",header=0)
cd2place.to_csv("place2cd.csv")

#County to CD
cd2county = pd.read_table("https://www2.census.gov/geo/docs/maps-data/data/rel2020/cd-sld/tab20_cd11820_county20_natl.txt", sep="|",header=0)
cd2county.to_csv("county2cd.csv")
    
#place10 to 20 update
place102place20 = pd.read_table("https://www2.census.gov/geo/docs/maps-data/data/rel2020/place/tab20_place20_place10_natl.txt", sep="|",header=0)
place102place20.to_csv("places_relational.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)
