In [5]:
from gerrychain import Graph

In [6]:
# Read Colorado county graph from the json file "COUNTY_08.json"
filepath = 'C:\\Users\sterl\OneDrive\Documents\\'
filename = 'COUNTY_08.json'

# GerryChain has a built-in function for reading graphs of this type:
G = Graph.from_json( filepath + filename )

In [7]:
# For each node, print the node #, county name, and its population
for node in G.nodes:
    name = G.nodes[node]["NAME10"]
    population = G.nodes[node]['TOTPOP']
    x_coordinate = G.nodes[node]['C_X']
    y_coordinate = G.nodes[node]['C_Y']
    print("Node",node,"is",name,"County, which has population",population,"and is centered at (",x_coordinate,",",y_coordinate,")")

Node 0 is Larimer County, which has population 299630 and is centered at ( -105.46115548979994 , 40.66641349770793 )
Node 1 is Las Animas County, which has population 15507 and is centered at ( -104.03871637705137 , 37.31585080985697 )
Node 2 is Fremont County, which has population 46824 and is centered at ( -105.4396570680379 , 38.472972462552946 )
Node 3 is Gunnison County, which has population 15324 and is centered at ( -107.03170010996529 , 38.66679771110492 )
Node 4 is Conejos County, which has population 8256 and is centered at ( -106.19162861810288 , 37.200696442550516 )
Node 5 is Eagle County, which has population 52197 and is centered at ( -106.69529923763571 , 39.62782564739975 )
Node 6 is Otero County, which has population 18831 and is centered at ( -103.71644567833219 , 37.90270171917877 )
Node 7 is La Plata County, which has population 51334 and is centered at ( -107.84333549404876 , 37.28655819216788 )
Node 8 is Summit County, which has population 27994 and is centered at

In [4]:
# pip install geopy

In [8]:

from geopy.distance import geodesic

# Store centroid location as ( long, lat )
Denver = ( G.nodes[0]['C_Y'],  G.nodes[0]['C_X'] )
Park = ( G.nodes[15]['C_Y'], G.nodes[15]['C_X'] )
Pueblo = ( G.nodes[23]['C_Y'], G.nodes[23]['C_X'] )

# Print the distance in miles
print("Payne -> Oklahoma:",geodesic(Payne, Oklahoma).miles)
print("Oklahoma -> Tulsa:",geodesic(Oklahoma, Tulsa).miles)
print("Tulsa -> Payne:",geodesic(Tulsa, Payne).miles)

ModuleNotFoundError: No module named 'geopy'

In [None]:
# create distance dictionary
dist = dict()
for i in G.nodes:
    for j in G.nodes:
        loc_i = ( G.nodes[i]['C_Y'],  G.nodes[i]['C_X'] )
        loc_j = ( G.nodes[j]['C_Y'],  G.nodes[j]['C_X'] )
        dist[i,j] = geodesic(loc_i,loc_j).miles

In [None]:
# check the dictionary by printing the Payne County -> Oklahoma County distance
print("Payne -> Oklahoma:",dist[0,15])

In [None]:
# Let's impose a 1% population deviation (+/- 0.5%)
deviation = 0.01

import math
k = 7         # number of districts
total_population = sum(G.nodes[node]['TOTPOP'] for node in G.nodes)

L = math.ceil((1-deviation/2)*total_population/k)
U = math.floor((1+deviation/2)*total_population/k)
print("Using L =",L,"and U =",U,"and k =",k)

In [9]:
import gurobipy as gp
from gurobipy import GRB

# create model 
m = gp.Model()

# create x[i,j] variable which equals one when county i 
#    is assigned to (the district centered at) county j
x = m.addVars(G.nodes, G.nodes, vtype=GRB.BINARY) 

Set parameter Username


In [10]:
# objective is to minimize the moment of inertia: d^2 * p * x
m.setObjective( gp.quicksum( dist[i,j]*dist[i,j]*G.nodes[i]['TOTPOP']*x[i,j] for i in G.nodes for j in G.nodes), GRB.MINIMIZE )

NameError: name 'dist' is not defined

In [11]:
# add constraints saying that each county i is assigned to one district
m.addConstrs( gp.quicksum(x[i,j] for j in G.nodes) == 1 for i in G.nodes)

# add constraint saying there should be k district centers
m.addConstr( gp.quicksum( x[j,j] for j in G.nodes ) == k )

# add constraints that say: if j roots a district, then its population is between L and U.
m.addConstrs( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes) >= L * x[j,j] for j in G.nodes )
m.addConstrs( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes) <= U * x[j,j] for j in G.nodes )

# add coupling constraints saying that if i is assigned to j, then j is a center.
m.addConstrs( x[i,j] <= x[j,j] for i in G.nodes for j in G.nodes )

m.update()

NameError: name 'k' is not defined

In [12]:
# Add contiguity constraints

import networkx as nx
DG = nx.DiGraph(G)

# Add variable f[j,u,v] which equals the amount of flow (originally from j) that is sent across arc (u,v)
f = m.addVars( DG.nodes, DG.edges, vtype=GRB.CONTINUOUS)
M = DG.number_of_nodes()-1

# Add constraint saying that node j cannot receive flow of its own type
m.addConstrs( gp.quicksum( f[j,u,j] for u in DG.neighbors(j) ) == 0 for j in DG.nodes )

# Add constraints saying that node i can receive flow of type j only if i is assigned to j
m.addConstrs( gp.quicksum( f[j,u,i] for u in DG.neighbors(i)) <= M * x[i,j] for i in DG.nodes for j in DG.nodes if i != j )

# If i is assigned to j, then i should consume one unit of j flow. 
#    Otherwise, i should consume no units of j flow.
m.addConstrs( gp.quicksum( f[j,u,i] - f[j,i,u] for u in DG.neighbors(i)) == x[i,j] for i in DG.nodes for j in DG.nodes if i != j )

m.update()

In [13]:
# solve, making sure to set a 0.00% MIP gap tolerance(!)
m.Params.MIPGap = 0.0
m.optimize()

Set parameter MIPGap to value 0
Gurobi Optimizer version 10.0.3 build v10.0.3rc0 (win64)

CPU model: Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads



GurobiError: Model too large for size-limited license; visit https://www.gurobi.com/free-trial for a full license

In [None]:
print("The moment of inertia objective is",m.objval)

# retrieve the districts and their populations
centers = [j for j in G.nodes if x[j,j].x > 0.5 ]
districts = [ [i for i in G.nodes if x[i,j].x > 0.5] for j in centers]
district_counties = [ [ G.nodes[i]["NAME10"] for i in districts[j] ] for j in range(k)]
district_populations = [ sum(G.nodes[i]["TOTPOP"] for i in districts[j]) for j in range(k) ]

# print district info
for j in range(k):
    print("District",j,"has population",district_populations[j],"and contains counties",district_counties[j])

In [None]:
# Let's draw it on a map
import geopandas as gpd

In [None]:
# Read Oklahoma county shapefile from "OK_county.shp"
filepath = 'C:\\Users\sterl\OneDrive\Documents\\'
filename = 'CO_counties.shp'

# Read geopandas dataframe from file
df = gpd.read_file( filepath + filename )

In [None]:
# Which district is each county assigned to?
assignment = [ -1 for u in G.nodes ]
    
# for each district j
for j in range(len(districts)):
    
    # for each node i in this district
    for i in districts[j]:
        
        # What is its GEOID?
        geoID = G.nodes[i]["GEOID10"]
        
        # Need to find this GEOID in the dataframe
        for u in G.nodes:
            if geoID == df['GEOID10'][u]: # Found it
                assignment[u] = j # Node u from the dataframe should be assigned to district j

# Now add the assignments to a column of the dataframe and map it
df['assignment'] = assignment
my_fig = df.plot(column='assignment').get_figure()