In [23]:
from gerrychain import Graph



In [24]:
# Read Colorado county graph from the json file "COUNTY_08.json"
filepath = 'C:\\Users\sterl\OneDrive\Documents\\'
filename = 'COUNTY_08.json'

# GerryChain has a built-in function for reading graphs of this type:
G = Graph.from_json( filepath + filename )

In [25]:
# for each node, print the node #, county name, population and lat-long coordinates
for node in G.nodes:
    name = G.nodes[node]['NAME10']
    population = G.nodes[node]['POP10']
    G.nodes[node]['TOTPOP'] = population
    
    G.nodes[node]['C_X'] = G.nodes[node]['INTPTLON10'] # longitude of county's center
    G.nodes[node]['C_Y'] = G.nodes[node]['INTPTLAT10'] # latitude of county's center
    
    print("Node",node,"is",name,"County, which has population",population,"and is centered at (",G.nodes[node]['C_X'],",",G.nodes[node]['C_Y'],")")

Node 0 is Larimer County, which has population 299630 and is centered at ( -105.4821309 , +40.6630912 )
Node 1 is Las Animas County, which has population 15507 and is centered at ( -104.0441103 , +37.3188308 )
Node 2 is Fremont County, which has population 46824 and is centered at ( -105.4214383 , +38.4556576 )
Node 3 is Gunnison County, which has population 15324 and is centered at ( -107.0781080 , +38.6696792 )
Node 4 is Conejos County, which has population 8256 and is centered at ( -106.1764473 , +37.2134065 )
Node 5 is Eagle County, which has population 52197 and is centered at ( -106.6929439 , +39.6306381 )
Node 6 is Otero County, which has population 18831 and is centered at ( -103.7212597 , +37.8841698 )
Node 7 is La Plata County, which has population 51334 and is centered at ( -107.8397178 , +37.2873673 )
Node 8 is Summit County, which has population 27994 and is centered at ( -106.1375545 , +39.6210227 )
Node 9 is Custer County, which has population 4255 and is centered at ( -

In [41]:
# impose a 2% population deviation (+/- 0.5%)
deviation = 0.02

import math
k = 7         # number of districts
total_population = sum( G.nodes[node]['TOTPOP'] for node in G.nodes )

L = math.ceil( ( 1 - deviation / 2 ) * total_population / k )
U = math.floor( ( 1 + deviation / 2 ) * total_population / k )
print( "Using L =",L,"and U =",U,"and k =",k )

Using L = 711273 and U = 725641 and k = 7


In [46]:
# create distance dictionary
from geopy.distance import geodesic

dist = { (i,j) : 0 for i in G.nodes for j in G.nodes }
for i in G.nodes:
    for j in G.nodes:
        loc_i = ( G.nodes[i]['C_Y'],G.nodes[i]['C_X'] )
        loc_j = ( G.nodes[j]['C_Y'],G.nodes[j]['C_X'] )
        dist[i,j] = geodesic( loc_i,loc_j ).miles

In [47]:
import gurobipy as gp
from gurobipy import GRB

# create model
m = gp.Model()

# create x[i,j] variable which equals one when county i is assigned to (the district centered at) county j
x = m.addVars(G.nodes, G.nodes, vtype=GRB.BINARY)

In [48]:
 # set objective to minimize the moment of inertia\n",
m.setObjective( gp.quicksum( dist[i,j] * dist[i,j] * G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes for j in G.nodes ), GRB.MINIMIZE )

In [49]:
# add constraints saying that each county i is assigned to one district
m.addConstrs( gp.quicksum( x[i,j] for j in G.nodes ) == 1 for i in G.nodes )

# add constraint saying there should be k district centers
m.addConstr( gp.quicksum( x[j,j] for j in G.nodes ) == k )

# add constraints that say: if j roots a district, then its population is between L and U.
m.addConstrs( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes ) >= L * x[j,j] for j in G.nodes )
m.addConstrs( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes ) <= U * x[j,j] for j in G.nodes )

# add coupling constraints saying that if i is assigned to j, then j is a center.
m.addConstrs( x[i,j] <= x[j,j] for i in G.nodes for j in G.nodes )

m.update()

In [50]:
# add contiguity constraints
import networkx as nx
DG = nx.DiGraph(G)

# add flow variables
f = m.addVars( DG.edges, G.nodes ) # f[i,j,v] = flow across arc (i,j) that is sent from root v

# add constraints saying that if node i is assigned to node j, then node i must consume one unit of node j's flow
m.addConstrs( gp.quicksum( f[u,i,j] - f[i,u,j] for u in G.neighbors(i) ) == x[i,j] for i in G.nodes for j in G.nodes if i != j )

# add constraints saying that node i can recieve flow of type j only if node i is assigned to node j
M = G.number_of_nodes() - 1
m.addConstrs( gp.quicksum( f[u,i,j] for u in G.neighbors(i) ) <= M * x[i,j] for i in G.nodes for j in G.nodes if i!= j )

# add constraints saying that node j cannot recieve flow of its own type
m.addConstrs( gp.quicksum( f[u,j,j] for u in G.neighbors(j) ) == 0 for j in G.nodes )

m.update()

In [51]:
# solve, with a 0.00% MIP gap tolerance
m.Params.MIPGap = 0.0

m.optimize()

Set parameter MIPGap to value 0
Gurobi Optimizer version 10.0.3 build v10.0.3rc0 (win64)

CPU model: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads



GurobiError: Model too large for size-limited license; visit https://www.gurobi.com/free-trial for a full license

In [22]:
# print the objective value
print('The objective value is =',m.objVal)
print('')

# retrieve the district "centers"
centers = [ j for j in G.nodes if x[j,j].x > 0.5 ]

# retrieve the districts and their populations
districts = [ [ i for i in G.nodes if x[i,j].x > 0.5 ] for j in centers ]
district_counties = [ [ G.nodes[i]["NAME20"] for i in districts[j] ] for j in range(k) ]
district_populations = [ sum( G.nodes[i]["TOTPOP"] for i in districts[j] ) for j in range(k) ]

# print district info
for j in range(k):
    print("District",j,"has population",district_populations[j],"and contains counties",district_counties[j])
    print("")

# print deviation between districts
print("The deviation between the districts is",abs( district_populations[0]-district_populations[1] ),"persons." )

AttributeError: Unable to retrieve attribute 'objVal'

In [34]:
# Read Colorado county shapefile 
import geopandas as gpd
filepath = 'C:\\Users\sterl\OneDrive\Documents\\'
filename = 'CO_counties.shp'

# Read geopandas dataframe from file
df = gpd.read_file( filepath + filename )

DriverError: C:\Users\sterl\OneDrive\Documents\CO_counties.shp: No such file or directory

In [35]:
# county i is assigned to district j
assignment = [ -1 for i in G.nodes ]

labeling = { i : -1 for i in G.nodes }
for j in range(k):
    district = districts[j]
    for i in district:
        labeling[i] = j


# add the assignments to a column of the dataframe and map it
node_with_this_geoid = {G.nodes[i]['GEOID20'] : i for i in G.nodes}

# pick a position u in the dataframe
for u in range(G.number_of_nodes()):
    
    geoid = df['GEOID20'][u]
    
    # what node in G has this geoid?
    i = node_with_this_geoid[geoid]
    
    # position u in the dataframe should be given the same district # that county i has in 'labeling'
    assignment[u] = labeling[i] 

# now add the assignments to a column of our dataframe and then map it
df['assignment'] = assignment

my_fig = df.plot(column='assignment').get_figure()

NameError: name 'districts' is not defined