In [1]:
import json
from networkx.readwrite import json_graph

def read_graph_from_json(json_file):
    with open(json_file) as f:
        data = json.load(f)
    return json_graph.adjacency_graph(data) 

In [2]:
# Read Oklahoma county graph from the json file "OK_county.json"
filepath = 'districting-data\\'
filename = 'OK_county.json'

G = read_graph_from_json( filepath + filename )

In [3]:
# For each node, print the node #, county name, and its population
for node in G.nodes:
    county_name = G.nodes[node]['NAME20']
    county_population = G.nodes[node]['P0010001']
    G.nodes[node]['TOTPOP'] = county_population
    print("Node",node,"represents",county_name,"County, which had a population of",county_population,"in the 2020 census.")

Node 0 represents Washita County, which had a population of 10924 in the 2020 census.
Node 1 represents Jackson County, which had a population of 24785 in the 2020 census.
Node 2 represents Major County, which had a population of 7782 in the 2020 census.
Node 3 represents Delaware County, which had a population of 40397 in the 2020 census.
Node 4 represents Custer County, which had a population of 28513 in the 2020 census.
Node 5 represents Ellis County, which had a population of 3749 in the 2020 census.
Node 6 represents Oklahoma County, which had a population of 796292 in the 2020 census.
Node 7 represents Johnston County, which had a population of 10272 in the 2020 census.
Node 8 represents Comanche County, which had a population of 121125 in the 2020 census.
Node 9 represents Pushmataha County, which had a population of 10812 in the 2020 census.
Node 10 represents Cleveland County, which had a population of 295528 in the 2020 census.
Node 11 represents Wagoner County, which had a p

In [4]:
# we are to solve the following task:
# input: a population vector, desired number of districts k
# output: a partition of the populations into k districts (not necessarily connected!) 
#            to minimize the difference between most and least populated districts

import gurobipy as gp
from gurobipy import GRB

In [5]:
k = 5  # desired number of districts

# create model 
m = gp.Model()

# create variables
x = m.addVars( G.nodes, k, vtype=GRB.BINARY )  # x[i,j] = 1 when county i is assigned to district j
y = m.addVar()                                 # y = population of smallest district
z = m.addVar()                                 # z = population of largest district

Set parameter Username
Academic license - for non-commercial use only - expires 2024-05-11


In [6]:
# objective is to minimize absolute population deviation
m.setObjective( z - y, GRB.MINIMIZE )

# add constraints saying that each county i is assigned to one district
m.addConstrs( gp.quicksum( x[i,j] for j in range(k) ) == 1 for i in G.nodes ) 

# add constraints saying that each district has population at least y
m.addConstrs( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes ) >= y for j in range(k) )

# add constraints saying that each district has population at most z
m.addConstrs( gp.quicksum( G.nodes[i]['TOTPOP'] * x[i,j] for i in G.nodes ) <= z for j in range(k) )

m.update()

In [7]:
# solve IP model
m.optimize()

Gurobi Optimizer version 10.0.1 build v10.0.1rc0 (win64)

CPU model: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, instruction set [SSE2|AVX|AVX2]
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Optimize a model with 87 rows, 387 columns and 1165 nonzeros
Model fingerprint: 0x4c85ac40
Variable types: 2 continuous, 385 integer (385 binary)
Coefficient statistics:
  Matrix range     [1e+00, 8e+05]
  Objective range  [1e+00, 1e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Found heuristic solution: objective 1731757.0000
Presolve time: 0.01s
Presolved: 87 rows, 387 columns, 1165 nonzeros
Variable types: 0 continuous, 387 integer (385 binary)

Root relaxation: objective 0.000000e+00, 119 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0    0.00000    0    9 1731757.00    0.00000   100%

In [8]:
# print the absolute population deviation
print("The minimum required deviation is",m.objVal,"persons.")

# retrieve the districts and their populations
districts = [ [ i for i in G.nodes if x[i,j].x > 0.5 ] for j in range(k) ]
district_county_names = [ [ G.nodes[i]['NAME20'] for i in district ] for district in districts ]
district_populations = [ sum( G.nodes[i]['TOTPOP'] for i in district ) for district in districts ]

# print district info
for j in range(k):
    print("District",j,"has these nodes =",districts[j],"and this population =",district_populations[j] )
    print("The corresponding county names are =",district_county_names[j] )
    print("")

The minimum required deviation is 5527.0 persons.
District 0 has these nodes = [0, 2, 3, 4, 5, 11, 15, 19, 23, 36, 40, 44, 46, 47, 50, 54, 59, 68, 69, 73] and this population = 790765
The corresponding county names are = ['Washita', 'Major', 'Delaware', 'Custer', 'Ellis', 'Wagoner', 'Cherokee', 'Pittsburg', 'McClain', 'Kingfisher', 'Haskell', 'Garfield', 'Sequoyah', 'Canadian', 'Alfalfa', 'Ottawa', 'Carter', 'Harmon', 'Kay', 'Pottawatomie']

District 1 has these nodes = [6] and this population = 796292
The corresponding county names are = ['Oklahoma']

District 2 has these nodes = [8, 12, 14, 16, 21, 22, 27, 29, 30, 35, 38, 39, 41, 42, 49, 53, 58, 60, 62, 63, 64, 65, 66, 67, 70, 71, 74, 75] and this population = 790765
The corresponding county names are = ['Comanche', 'Beckham', 'Craig', 'Osage', 'Pawnee', 'Okfuskee', 'Choctaw', 'Bryan', 'Latimer', 'McIntosh', 'Murray', 'Nowata', 'Grady', 'Payne', 'Harper', 'Muskogee', 'Coal', 'Greer', 'McCurtain', 'Atoka', 'Okmulgee', 'Washington', 'S

In [9]:
import networkx as nx
for district in districts:
    print("Is district =", district, "connected?", nx.is_connected( G.subgraph( district ) ) )

Is district = [0, 2, 3, 4, 5, 11, 15, 19, 23, 36, 40, 44, 46, 47, 50, 54, 59, 68, 69, 73] connected? False
Is district = [6] connected? True
Is district = [8, 12, 14, 16, 21, 22, 27, 29, 30, 35, 38, 39, 41, 42, 49, 53, 58, 60, 62, 63, 64, 65, 66, 67, 70, 71, 74, 75] connected? False
Is district = [7, 9, 10, 13, 17, 18, 20, 24, 25, 28, 31, 33, 45, 51, 52, 55, 56, 61, 72] connected? False
Is district = [1, 26, 32, 34, 37, 43, 48, 57, 76] connected? False
