In [1]:
import matplotlib.pyplot as plt
from gerrychain import (GeographicPartition, Partition, Graph, MarkovChain,
                        proposals, updaters, constraints, accept, Election)
from gerrychain.proposals import recom
from functools import partial
import pandas as pd
import geopandas as gpd

In [3]:
# Testing if tutorial works
graph = Graph.from_file("./data/pa/PA.shp")

AttributeError: 'Polygon' object has no attribute 'id'

# Creating the dataframe
First, we create a dataframe including all the information we need: the location of each block, the population of each block, and the district assignment of each block. We also include the total population of each district. The shapefile uses the 2020 census block shapes, and the population data is from the 2020 census too. The block equivalency file is from the North Carolina Legislature's website.

In [2]:
blocks_shp = gpd.read_file("./data/shapefiles/tl_2020_37_tabblock20.shp")
blocks_data = pd.read_csv("./data/nc/DECENNIALPL2020.P1-Data-block.csv")
blocks_data = blocks_data[blocks_data["NAME"].str.contains("Census Tract")]
blocks_data = blocks_data[["GEO_ID", "NAME", "P1_001N", "P1_003N"]] # P1_001N = Total Population, P1_003N = White alone
blocks_data["P1_001N"] = blocks_data["P1_001N"].astype(int)
blocks_data["P1_003N"] = blocks_data["P1_003N"].astype(int)
blocks_data["GEOID"] = blocks_data["GEO_ID"].str[9:]
blocks = blocks_shp.merge(blocks_data, left_on="GEOID20", right_on="GEOID", validate="one_to_one")

  blocks_data = pd.read_csv("./data/nc/DECENNIALPL2020.P1-Data-block.csv")


In [3]:
districts = dict(pd.read_csv("./data/nc/equivalency.csv", dtype=str).values)

In [4]:
districts

{'371139706002016': '11',
 '371139706002042': '11',
 '371139706002017': '11',
 '371139706002059': '11',
 '370399302002040': '11',
 '370399302002017': '11',
 '370399302002020': '11',
 '370399302002022': '11',
 '370399302002005': '11',
 '370399302001055': '11',
 '370399302001059': '11',
 '370399302001050': '11',
 '370399302001042': '11',
 '370399302001054': '11',
 '370399302001051': '11',
 '370399302002001': '11',
 '370399302002019': '11',
 '370399302002021': '11',
 '370399302002004': '11',
 '370399302001049': '11',
 '370399302002002': '11',
 '370399302001053': '11',
 '370399302002030': '11',
 '370399302002032': '11',
 '370399302002003': '11',
 '370399302002000': '11',
 '370399301013027': '11',
 '370399301011007': '11',
 '370399301013050': '11',
 '370399302002033': '11',
 '370399302002031': '11',
 '370399302002045': '11',
 '370399302002044': '11',
 '370399301011013': '11',
 '370399301011009': '11',
 '370399301011005': '11',
 '370399301011010': '11',
 '370399301011012': '11',
 '3703993010

In [5]:
blocks["District"] = blocks["GEOID20"].map(districts)

In [6]:
blocks.head()

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,...,INTPTLON20,HOUSING20,POP20,geometry,GEO_ID,NAME,P1_001N,P1_003N,GEOID,District
0,37,85,70904,1016,370850709041016,Block 1016,G5040,R,,,...,-78.7429098,135,236,"POLYGON ((-78.74612 35.51781, -78.74593 35.517...",1000000US370850709041016,"Block 1016, Block Group 1, Census Tract 709.04...",236,67,370850709041016,13
1,37,105,30300,2040,371050303002040,Block 2040,G5040,U,78877.0,U,...,-79.1609933,0,0,"POLYGON ((-79.16192 35.46852, -79.16154 35.469...",1000000US371050303002040,"Block 2040, Block Group 2, Census Tract 303, L...",0,0,371050303002040,9
2,37,85,70200,3007,370850702003007,Block 3007,G5040,U,25039.0,U,...,-78.6080862,6,14,"POLYGON ((-78.60900 35.30999, -78.60828 35.310...",1000000US370850702003007,"Block 3007, Block Group 3, Census Tract 702, H...",14,0,370850702003007,13
3,37,85,70500,3022,370850705003022,Block 3022,G5040,U,25039.0,U,...,-78.6570878,26,45,"POLYGON ((-78.65879 35.31721, -78.65876 35.317...",1000000US370850705003022,"Block 3022, Block Group 3, Census Tract 705, H...",45,26,370850705003022,13
4,37,85,70401,1041,370850704011041,Block 1041,G5040,U,25039.0,U,...,-78.6628098,16,46,"POLYGON ((-78.66407 35.35466, -78.66371 35.355...",1000000US370850704011041,"Block 1041, Block Group 1, Census Tract 704.01...",46,9,370850704011041,13


In [7]:
blocks_keep = blocks[['GEOID20', 'geometry', 'District', 'P1_001N', 'P1_003N']]

In [8]:
blocks_keep.head()

Unnamed: 0,GEOID20,geometry,District,P1_001N,P1_003N
0,370850709041016,"POLYGON ((-78.74612 35.51781, -78.74593 35.517...",13,236,67
1,371050303002040,"POLYGON ((-79.16192 35.46852, -79.16154 35.469...",9,0,0
2,370850702003007,"POLYGON ((-78.60900 35.30999, -78.60828 35.310...",13,14,0
3,370850705003022,"POLYGON ((-78.65879 35.31721, -78.65876 35.317...",13,45,26
4,370850704011041,"POLYGON ((-78.66407 35.35466, -78.66371 35.355...",13,46,9


In [9]:
blocks_keep.isna().sum()

GEOID20     0
geometry    0
District    0
P1_001N     0
P1_003N     0
dtype: int64

# Saving the dataframe as a graph
Next, we convert the dataframe into a graph useing the gerrychain package.

In [11]:
dual_graph = Graph.from_geodataframe(blocks_keep, ignore_errors=True)

AttributeError: 'Polygon' object has no attribute 'id'