In [1]:
# where are block-level graphs stored?
filepath = 'C://districting-data-2020-reprojection//'

# download congressional bafs from:
#   https://www.census.gov/programs-surveys/decennial-census/about/rdo/congressional-districts.html
# and store them in:
filepath1 = 'C:\\districting-data-2020\\cd118\\'

# download state legislative bafs from 
#   https://www.census.gov/programs-surveys/decennial-census/about/rdo/state-legislative-district.html
# and store them in:
filepath2 = 'C:\\districting-data-2020\\sldu_2022\\'
filepath3 = 'C:\\districting-data-2020\\sldl_2022\\'

import pandas
from read import read_graph_from_json
from metrics import report_metrics
from number_of_districts import number_of_districts

In [2]:
states = {
'01': {'abbr': 'AL', 'epsg': '3465', 'name': 'Alabama'},
'02': {'abbr': 'AK', 'epsg': '3471', 'name': 'Alaska'},
'04': {'abbr': 'AZ', 'epsg': '3478', 'name': 'Arizona'},
'05': {'abbr': 'AR', 'epsg': '3484', 'name': 'Arkansas'},
'06': {'abbr': 'CA', 'epsg': '3493', 'name': 'California'},
'08': {'abbr': 'CO', 'epsg': '3501', 'name': 'Colorado'},
'09': {'abbr': 'CT', 'epsg': '3507', 'name': 'Connecticut'},
'10': {'abbr': 'DE', 'epsg': '3509', 'name': 'Delaware'},
'12': {'abbr': 'FL', 'epsg': '3514', 'name': 'Florida'},
'13': {'abbr': 'GA', 'epsg': '3518', 'name': 'Georgia'},
'15': {'abbr': 'HI', 'epsg': '2784', 'name': 'Hawaii'},
'16': {'abbr': 'ID', 'epsg': '3524', 'name': 'Idaho'},
'17': {'abbr': 'IL', 'epsg': '3528', 'name': 'Illinois'},
'18': {'abbr': 'IN', 'epsg': '3532', 'name': 'Indiana'},
'19': {'abbr': 'IA', 'epsg': '3536', 'name': 'Iowa'},
'20': {'abbr': 'KS', 'epsg': '3540', 'name': 'Kansas'},
'21': {'abbr': 'KY', 'epsg': '3544', 'name': 'Kentucky'},
'22': {'abbr': 'LA', 'epsg': '3550', 'name': 'Louisiana'},
'23': {'abbr': 'ME', 'epsg': '3557', 'name': 'Maine'},
'24': {'abbr': 'MD', 'epsg': '3559', 'name': 'Maryland'},
'25': {'abbr': 'MA', 'epsg': '3585', 'name': 'Massachusetts'},
'26': {'abbr': 'MI', 'epsg': '3587', 'name': 'Michigan'},
'27': {'abbr': 'MN', 'epsg': '3594', 'name': 'Minnesota'},
'28': {'abbr': 'MS', 'epsg': '3597', 'name': 'Mississippi'},
'29': {'abbr': 'MO', 'epsg': '3602', 'name': 'Missouri'},
'30': {'abbr': 'MT', 'epsg': '3604', 'name': 'Montana'},
'31': {'abbr': 'NE', 'epsg': '3606', 'name': 'Nebraska'},
'32': {'abbr': 'NV', 'epsg': '3607', 'name': 'Nevada'},
'33': {'abbr': 'NH', 'epsg': '3613', 'name': 'NewHampshire'},
'34': {'abbr': 'NJ', 'epsg': '3615', 'name': 'NewJersey'},
'35': {'abbr': 'NM', 'epsg': '3617', 'name': 'NewMexico'},
'36': {'abbr': 'NY', 'epsg': '3623', 'name': 'NewYork'},
'37': {'abbr': 'NC', 'epsg': '3631', 'name': 'NorthCarolina'},
'38': {'abbr': 'ND', 'epsg': '3633', 'name': 'NorthDakota'},
'39': {'abbr': 'OH', 'epsg': '3637', 'name': 'Ohio'},
'40': {'abbr': 'OK', 'epsg': '3639', 'name': 'Oklahoma'},
'41': {'abbr': 'OR', 'epsg': '3645', 'name': 'Oregon'},
'42': {'abbr': 'PA', 'epsg': '3649', 'name': 'Pennsylvania'},
'44': {'abbr': 'RI', 'epsg': '3653', 'name': 'RhodeIsland'},
'45': {'abbr': 'SC', 'epsg': '3655', 'name': 'SouthCarolina'},
'46': {'abbr': 'SD', 'epsg': '3657', 'name': 'SouthDakota'},
'47': {'abbr': 'TN', 'epsg': '3661', 'name': 'Tennessee'},
'48': {'abbr': 'TX', 'epsg': '3669', 'name': 'Texas'},
'49': {'abbr': 'UT', 'epsg': '3675', 'name': 'Utah'},
'50': {'abbr': 'VT', 'epsg': '3684', 'name': 'Vermont'},
'51': {'abbr': 'VA', 'epsg': '3685', 'name': 'Virginia'},
'53': {'abbr': 'WA', 'epsg': '3689', 'name': 'Washington'},
'54': {'abbr': 'WV', 'epsg': '3693', 'name': 'WestVirginia'},
'55': {'abbr': 'WI', 'epsg': '3695', 'name': 'Wisconsin'},
'56': {'abbr': 'WY', 'epsg': '3703', 'name': 'Wyoming'}
}

def get_fips(state):
    for code in states.keys():
        if states[code]['abbr'] == state:
            return code
    assert False # this shouldn't happen
    return None

In [3]:
district_type = 'SS'

for state in ['LA','MS', 'AL', 'GA']:

    print(state)
    
    # read block-level graph
    filename = state + "_block.json"
    G = read_graph_from_json(filepath+filename)
    geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

    # how many districts total?
    k = number_of_districts[state,district_type]
    districts = [ list() for j in range(k) ]
    
    # read the enacted plan from file
    fips = get_fips(state)
    filename = fips + '_' + state + '_SLDU22.txt'
    csvFile = pandas.read_csv( filepath2 + filename, skipinitialspace = True )
    unassigned = list()
    for index, row in csvFile.iterrows():

        # what is the block's geoid?
        g = str( row['GEOID'] )
        if len(g) < 15: # fix issue with leading zeros
            g = '0' + g

        # which node does this correspond to?
        i = geoid_to_node[g]

        # which district is it in?
        j = str( row['SLDUST'] ) 

        # is it unassigned??
        if j in { 'ZZ', 'ZZZ' }:
            unassigned.append(i)
            continue
        else:
            districts[int(j)-1].append(i)

    print("unassigned =",unassigned)
    if len(unassigned)==0:
        report_metrics(G, districts)

LA
unassigned = [87404, 4599, 27126, 100726, 53817, 60469, 9152, 91806, 125356, 42835, 116422, 91831, 129860, 4617, 140929, 9268, 89665, 74043, 89474, 9164, 135656, 86432, 9163, 96244, 75377, 47210, 122252, 49819, 89992, 74464, 717, 114634, 130146, 118913, 92196, 92263, 110290, 36738, 32161, 103505, 36848, 74452, 47748, 18488, 83297, 141379, 41078, 103400, 92433, 34464, 30021, 136960, 139885, 139191, 67801, 119777, 44037, 118507, 41900, 120619, 86349, 48373, 26143, 10412, 1561, 124370, 66178, 5859, 138500, 142675, 117882, 124780, 116816, 102631, 136919, 74439]
MS
unassigned = []
-> 43 counties split a total of 64 times
-> average Polsby-Popper score of 0.263
AL
unassigned = []
-> 19 counties split a total of 35 times
-> average Polsby-Popper score of 0.2568
GA
unassigned = []
-> 29 counties split a total of 60 times
-> average Polsby-Popper score of 0.287


In [4]:
# not all blocks are assigned in LA/SS. We complete it by hand on DRA and import it here
# https://davesredistricting.org/maps#viewmap::274d5e08-2f33-4d09-9be5-8e8e29fb6fe1

district_type = 'SS'

for state in ['LA']:

    print(state)
    
    # read block-level graph
    filename = state + "_block.json"
    G = read_graph_from_json(filepath+filename)
    geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

    # how many districts total?
    k = number_of_districts[state,district_type]
    districts = [ list() for j in range(k) ]
    
    # read the enacted plan from file
    fips = get_fips(state)
    filename = 'LA_SS_enacted.csv'
    csvFile = pandas.read_csv( filename, skipinitialspace = True )
    unassigned = list()
    for index, row in csvFile.iterrows():

        # what is the block's geoid?
        g = str( row['GEOID20'] )
        if len(g) < 15: # fix issue with leading zeros
            g = '0' + g

        # which node does this correspond to?
        i = geoid_to_node[g]

        # which district is it in?
        District_j = str( row['District'] ) 
        j = District_j[9:]

        # is it unassigned??
        if j in { 'ZZ', 'ZZZ' }:
            unassigned.append(i)
            continue
        else:
            districts[int(j)-1].append(i)

    print("unassigned =",unassigned)
    if len(unassigned)==0:
        report_metrics(G, districts)


LA
unassigned = []
-> 40 counties split a total of 77 times
-> average Polsby-Popper score of 0.1837


In [5]:
district_type = 'SH'

for state in ['LA', 'MS', 'AL', 'GA']:

    print(state)
    
    # read block-level graph
    filename = state + "_block.json"
    G = read_graph_from_json(filepath+filename)
    geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

    # how many districts total?
    k = number_of_districts[state,district_type]
    districts = [ list() for j in range(k) ]
    
    # read the enacted plan from file
    fips = get_fips(state)
    filename = fips + '_' + state + '_SLDL22.txt'
    csvFile = pandas.read_csv( filepath3 + filename, skipinitialspace = True )
    unassigned = list()
    for index, row in csvFile.iterrows():

        # what is the block's geoid?
        g = str( row['GEOID'] )
        if len(g) < 15: # fix issue with leading zeros
            g = '0' + g

        # which node does this correspond to?
        i = geoid_to_node[g]

        # which district is it in?
        j = str( row['SLDLST'] ) 

        # is it unassigned??
        if j in { 'ZZ', 'ZZZ' }:
            unassigned.append(i)
            continue
        else:
            districts[int(j)-1].append(i)

    print("unassigned =",unassigned)
    if len(unassigned)==0:
        report_metrics(G, districts)

LA
unassigned = []
-> 41 counties split a total of 116 times
-> average Polsby-Popper score of 0.2911
MS
unassigned = []
-> 68 counties split a total of 181 times
-> average Polsby-Popper score of 0.2644
AL
unassigned = []
-> 39 counties split a total of 115 times
-> average Polsby-Popper score of 0.2445
GA
unassigned = []
-> 69 counties split a total of 209 times
-> average Polsby-Popper score of 0.2784


In [6]:
# what is average PP score for LA SH majority-Black districts?
district_type = 'SH'

for state in ['LA']:

    print(state)
    
    # read block-level graph
    filename = state + "_block.json"
    G = read_graph_from_json(filepath+filename)
    geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

    # how many districts total?
    k = number_of_districts[state,district_type]
    districts = [ list() for j in range(k) ]
    
    # read the enacted plan from file
    fips = get_fips(state)
    filename = fips + '_' + state + '_SLDL22.txt'
    csvFile = pandas.read_csv( filepath3 + filename, skipinitialspace = True )
    unassigned = list()
    for index, row in csvFile.iterrows():

        # what is the block's geoid?
        g = str( row['GEOID'] )
        if len(g) < 15: # fix issue with leading zeros
            g = '0' + g

        # which node does this correspond to?
        i = geoid_to_node[g]

        # which district is it in?
        j = str( row['SLDLST'] ) 

        # is it unassigned??
        if j in { 'ZZ', 'ZZZ' }:
            unassigned.append(i)
            continue
        else:
            districts[int(j)-1].append(i)

    print("unassigned =",unassigned)
    if len(unassigned)==0:
        # majority-Black district numbers 
        gingles_original = [ 100,99,61,3,29,34,97,4,63,2,102,26,17,16,101,44,87,57,58,93,11,21,96,62,40,83,72,67,23 ]
        gingles = [ i-1 for i in gingles_original ]
        gingles_districts = [ districts[j] for j in gingles ]
        print("Number of Gingles districts:",len(gingles_districts))
        from metrics import average_polsby_popper
        from coarsen import subgraph
        gingles_nodes = [ i for j in range(len(gingles_districts)) for i in gingles_districts[j] ]
        GS = subgraph(G, gingles_nodes)
        print("avepp:",average_polsby_popper(GS, gingles_districts))

LA
unassigned = []
Number of Gingles districts: 29
avepp: 0.267005521474827


In [6]:
# get metrics for MS/SH because we had to manually divide up a multidistrict
state = 'MS'
district_type = 'SH'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
for i in G.nodes:
    G.nodes[i]['TOTPOP'] = G.nodes[i]['P0010001'] 
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = "gingles_MS_SH_block.csv"
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)
    
# 51 majority-Black districts were carved at first (2 more came later during complete/cleanup)
sum_population = 0
print("# population")
for p in range(51):
    district = districts[p]
    population = sum( G.nodes[i]['TOTPOP'] for i in district )
    sum_population += population
    print(p,population)
print("average population among carved:",sum_population/51)
    

unassigned = []
-> 52 counties split a total of 119 times
-> average Polsby-Popper score of 0.4038
# population
0 24070
1 24785
2 25212
3 23077
4 23194
5 23712
6 25217
7 23245
8 23582
9 24351
10 23240
11 23091
12 24820
13 23820
14 24099
15 24685
16 23529
17 23258
18 25476
19 25320
20 23978
21 25103
22 23485
23 23184
24 23146
25 23455
26 23138
27 23635
28 24356
29 23239
30 23377
31 23669
32 25183
33 23232
34 24645
35 23802
36 25334
37 23964
38 25223
39 23270
40 24501
41 24750
42 23227
43 23124
44 23563
45 23274
46 23179
47 23660
48 24503
49 23086
50 23312
average population among carved: 23929.019607843136


In [10]:
state = 'AL'
district_type = 'CD'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = 'PlanDDec26.csv'
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)


unassigned = []
-> 5 counties split a total of 6 times
-> average Polsby-Popper score of 0.252


In [11]:
state = 'AL'
district_type = 'CD'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = 'livingston.csv'
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)


unassigned = []
-> 6 counties split a total of 6 times
-> average Polsby-Popper score of 0.2817


In [12]:
state = 'AL'
district_type = 'CD'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = 'AL_enacted_2022.csv'
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)


unassigned = []
-> 6 counties split a total of 6 times
-> average Polsby-Popper score of 0.2221


In [15]:
state = 'MS'
district_type = 'SS'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = 'gingles_MS_SS_block.csv'
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)


unassigned = []
-> 25 counties split a total of 46 times
-> average Polsby-Popper score of 0.3377


In [16]:
state = 'MS'
district_type = 'SH'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = 'gingles_MS_SH_block.csv'
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)


unassigned = []
-> 50 counties split a total of 118 times
-> average Polsby-Popper score of 0.4137


In [17]:
state = 'GA'
district_type = 'SH'

# read block-level graph
filename = state + "_block.json"
G = read_graph_from_json(filepath+filename)
geoid_to_node = { G.nodes[i]['GEOID20'] : i for i in G.nodes }

# how many districts total?
k = number_of_districts[state,district_type]
districts = [ list() for j in range(k) ]

# read the plan from file
filename = 'gingles_GA_SH_block.csv'
csvFile = pandas.read_csv( filename, skipinitialspace = True )
unassigned = list()
for index, row in csvFile.iterrows():

    # what is the block's geoid?
    g = str( row['GEOID20'] )
    if len(g) < 15: # fix issue with leading zeros
        g = '0' + g

    # which node does this correspond to?
    i = geoid_to_node[g]

    # which district is it in?
    j = str( row['District'] ) 

    # is it unassigned??
    if j in { 'ZZ', 'ZZZ' }:
        unassigned.append(i)
        continue
    else:
        districts[int(j)-1].append(i)

print("unassigned =",unassigned)
if len(unassigned)==0:
    report_metrics(G, districts)


unassigned = []
-> 63 counties split a total of 180 times
-> average Polsby-Popper score of 0.4204
