In [1]:
#import
from deap_wrapper import genetic_algorithm

import warnings
warnings.filterwarnings('ignore')

# Example - TSP without local search

In [2]:
# load problem data - 7 cities
import json
with open("data/HillierTSP.json", "r") as tsp_data:
    tsp = json.load(tsp_data)

distance_matrix = tsp["DistanceMatrix"]
ind_size = tsp["TourSize"] # number fo variables or dimension of the problem

def tour_distance(individual, dist_mat):
    # an individual is a tour 
    distance = dist_mat[individual[-1]][individual[0]]
    for gene1, gene2 in zip(individual[0:-1], individual[1:]):
        distance += dist_mat[gene1][gene2]
    return distance    

optimal_tour = [0,1,3,5,6,4,2]
best_val,best_x,logbook = genetic_algorithm(tour_distance,ind_type='permutation',ind_size=ind_size,pop_size=100, cx_prob = .7, mut_prob = 0.2, max_gen = 200, max_no_improve = 40,dist_mat=distance_matrix)
print(f'Length of best tour {best_val}')
print('\nBest tour found:')
print(best_x)

Length of best tour 63.0

Best tour found:
[5, 6, 4, 2, 0, 1, 3]


In [3]:
# this can take a while, reduce pop_size, max_no_improve to speed it up
# but the answer may not be as good

# load problem data - 48 cities
import json
with open("data/Caps48.json", "r") as tsp_data:
    tsp = json.load(tsp_data)

distance_matrix = tsp["DistanceMatrix"]
ind_size = tsp["TourSize"] # number of variables or dimension of the problem

def tour_distance(individual, dist_mat):
    # an individual is a tour 
    distance = dist_mat[individual[-1]][individual[0]]
    for gene1, gene2 in zip(individual[0:-1], individual[1:]):
        distance += dist_mat[gene1][gene2]
    return distance    

best_val,best_x,logbook = genetic_algorithm(tour_distance,ind_type='permutation',ind_size=ind_size,pop_size=1000, cx_prob = .7, mut_prob = 0.2, max_gen = 2000, max_no_improve = 500,dist_mat=distance_matrix)
print(f'Length of best tour {best_val/1000:.0f} km')
print('\nBest tour found:')
print(best_x)

Length of best tour 18151 km

Best tour found:
[34, 3, 25, 1, 28, 40, 15, 21, 22, 10, 46, 19, 11, 14, 32, 45, 17, 6, 27, 5, 36, 18, 26, 16, 42, 29, 35, 43, 30, 37, 8, 7, 0, 39, 2, 33, 13, 24, 12, 20, 31, 38, 47, 4, 41, 23, 9, 44]


# GA for Knapsack

In [4]:
# Solve Knapsack with DEAP

import numpy as np

num_items = 20
np.random.seed(seed=123)
values = np.random.randint(low=5, high=50, size=num_items)
weights = np.random.randint(low=1, high=10, size=num_items)
max_weight = 50
np.random.seed() # use system clock? to reset the seed

def knapsack_value(x, values, weights, max_weight):
    # x is a vector of booleans of which items to include
    tot_value = sum(values[x])
    penalty = sum(values)*min( max_weight - sum(weights[x]), 0) # 0 if underweight, neg if overweight
    return tot_value+penalty

best_val,best_x,logbook = genetic_algorithm(knapsack_value,ind_type='boolean',ind_size=num_items,pop_size=400, cx_prob = .7, mut_prob = 0.2, max_gen = 400, max_no_improve = 100, minimize=False, values = values, weights = weights, max_weight=max_weight )
print(f'Total Value of items is {best_val}')
print('\nKnapsack:')
print(best_x)

Total Value of items is 435.0

Knapsack:
[True, False, True, True, False, False, True, True, True, True, False, True, True, False, True, False, True, True, True, False]


# Rastrigin - no local search

In [5]:
import numpy as np

def rastrigin(x):
    # pass a single vector of length n (=dim) to evaluate Rastrigin
    xnp = np.array(x)
    return sum(xnp**2 + 10 - 10 * np.cos(2 * np.pi * xnp))

best_val,best_x,logbook = genetic_algorithm( rastrigin, ind_type='float', ind_size=10, pop_size=200, cx_prob = .7, mut_prob = 0.2, max_gen = 2000, max_no_improve = 200, minimize=True, lower=-5.12, upper = 5.12 )
print(f'Minimum Value {best_val}')
print('\n x:')
print(best_x)

Minimum Value 0.0019782559586172255

 x:
[-0.0004072791247833427, 4.433076303863617e-05, 2.3528377014983872e-08, 0.0004428889271084563, -2.992388456379533e-05, 2.1711156106067244e-05, -0.0007296674910242002, -0.000136469873798129, -0.0028454405748494257, -0.00097915009087516]


In [6]:
# use one point crossover instead of blended crossover, may not work as well
import numpy as np

def rastrigin(x):
    # pass a single vector of length n (=dim) to evaluate Rastrigin
    xnp = np.array(x)
    return sum(xnp**2 + 10 - 10 * np.cos(2 * np.pi * xnp))

user_config_dict = {"mate_op":"cxOnePoint", "mate_param":{}}

best_val,best_x,logbook = genetic_algorithm( rastrigin, ind_type='float', ind_size=10, pop_size=200, cx_prob = .7, mut_prob = 0.2, max_gen = 2000, max_no_improve = 200, minimize=True, lower=-5.12, upper = 5.12, user_config_dict = user_config_dict )
print(f'Minimum Value {best_val}')
print('\n x:')
print(best_x)

Minimum Value 0.0016206961043927492

 x:
[0.0006822720515603498, 0.0020450761279650585, 0.0003393459691272494, -0.0002689722459921361, -0.0009827565932101785, -0.0003583816632108431, -0.0001515465415164556, -0.00021123563385403463, -0.0009651160291551959, 0.0011138262679108654]


# Gerrymandering GA

This doesn't work well.  The formulation isn't solid since the numbering scheme leads to non-unique answers

See if a better formulation can be worked out or find a simpler integer-valued GA problem.

In [7]:
# may be slow

# set the data and define the objective function
num_districts = 10
min_voters_in_district = 150
max_voters_in_district = 350
num_cities = 18

dems = [152,81,75,34,62,38,48,74,98,66,83,86,72,28,112,45,93,72]
reps = [62,59,83,52,87,87,69,49,62,72,75,82,83,53,98,82,68,98]

import pandas as pd
cities = pd.DataFrame( data = {'dems':dems, 'reps':reps})

# same as in Lesson 4
def fitness_districts(assign, cities):
    df = cities.groupby(assign).sum()
    fitness = sum( df['reps'] > df['dems'] )
    total_voters = np.zeros(num_districts,dtype=np.int32)
    total_voters[df.index] = df.sum(axis=1)
    fitness-= np.abs(np.minimum(np.maximum(total_voters,150),350)-total_voters).sum()
    return fitness 

# local search operator to reorder district labels
def loc_search_reorder(ind,**kwargs):
    fitness = ind.fitness.values[0]
    remap = remap = dict(zip(list(dict.fromkeys(ind)),range(len(ind)+1)))
    assign = [remap[x] for x in ind]
    return assign, fitness
    

districts_won,assign,logbook = genetic_algorithm( fitness_districts, ind_type='integer', ind_size=num_cities, pop_size=1000, cx_prob = 0.2, mut_prob = 0.8, max_gen = 100, max_no_improve = 40, minimize=False, lower=0, upper = num_districts-1, loc_search_fun = loc_search_reorder, ls_num_update = 30, cities = cities)

# for printing out an assignment of cities to districts
def summarize_districts(assign, cities):
    reps = np.zeros(num_districts, dtype=np.int32)
    dems = np.zeros(num_districts, dtype=np.int32)
    df = cities.groupby(assign).sum()
    reps[df.index] = df['reps']
    dems[df.index] = df['dems']
    total = reps + dems
    delta = np.minimum(np.maximum(total, min_voters_in_district),
                       max_voters_in_district) - total
    rep_win = reps > dems
    dict = {
        'reps': reps,
        'dems': dems,
        'total': total,
        'rep_win': rep_win
    }
    return (pd.DataFrame(data=dict))

print(f'Number of districts won by republicans is {districts_won}')
summarize_districts(assign, cities)

print(logbook)
print(assign)

Number of districts won by republicans is 8.0
avg     	evals	gen	ls	max	min  	std    
-508.723	830  	0  	30	6  	-1441	224.512
-383.681	840  	1  	23	6  	-1167	189.284
-306.889	819  	2  	23	7  	-1193	175.616
-248.501	856  	3  	22	7  	-1051	170.113
-213.718	852  	4  	22	7  	-931 	167.864
-178.889	842  	5  	20	7  	-1178	157.85 
-147.988	858  	6  	24	7  	-757 	140.13 
-135.36 	849  	7  	24	8  	-882 	150.145
-126.512	815  	8  	21	7  	-937 	155.758
-105.453	865  	9  	23	7  	-900 	136.207
-100.807	826  	10 	23	7  	-858 	140.342
-100.252	841  	11 	23	7  	-1098	146.426
-92.406 	837  	12 	21	7  	-965 	134.641
-83.338 	835  	13 	23	7  	-601 	122.164
-86.633 	823  	14 	27	7  	-963 	134.263
-96.341 	830  	15 	27	7  	-701 	134.959
-82.748 	828  	16 	27	8  	-843 	129.957
-79.345 	848  	17 	30	8  	-623 	121.887
-84.076 	836  	18 	29	8  	-885 	129.653
-82.854 	827  	19 	28	8  	-703 	121.034
-86.142 	845  	20 	27	8  	-837 	130.89 
-79.5   	824  	21 	20	8  	-753 	122.361
-84.242 	827  	22 	21	8  	-820 	12

# TSP GA with Local Search

In [8]:
import numpy as np

# objective function
def tour_distance(individual, dist_mat):
    # an individual is a tour 
    distance = dist_mat[individual[-1]][individual[0]]
    for gene1, gene2 in zip(individual[0:-1], individual[1:]):
        distance += dist_mat[gene1][gene2]
    return distance

# local search functions
def sub_tour_reversal(tour,i,j):
    n = len(tour)
    return (np.concatenate((tour[0:i], tour[j:-n + i - 1:-1], tour[j + 1:n])).astype(int))

def two_opt(start_tour,dist_mat):
    num_cities = len(start_tour)
    current_dist = tour_distance(start_tour, dist_mat)
    best_tour = start_tour
    best_dist = current_dist

    improvement = True
    iterations = 0
    while improvement:
        improvement = False
        for i in range(num_cities - 1):
            for j in range(i + 1, num_cities):
                iterations += 1
                new_tour = sub_tour_reversal(best_tour, i, j)
                new_dist = tour_distance(new_tour, dist_mat)
                if new_dist < best_dist:
                    best_tour = new_tour
                    best_dist = new_dist
                    improvement = True
    return best_tour, best_dist

# load problem data
import json
with open("data/HillierTSP.json", "r") as tsp_data:
    tsp = json.load(tsp_data)

distance_matrix = tsp["DistanceMatrix"]
ind_size = tsp["TourSize"] # number fo variables or dimension of the problem

optimal_tour = [0,1,3,5,6,4,2]

# use user_config_dict to change from PartiallyMatched crossover to Ordered crossover

best_val,best_x,logbook = genetic_algorithm(tour_distance,ind_type='permutation',ind_size=ind_size,pop_size=20, cx_prob = .7, mut_prob = 0.2, max_gen = 200, max_no_improve = 10,loc_search_fun = two_opt,ls_num_update = 5,ls_update_type = 'best',dist_mat=distance_matrix)
print(f'Length of best tour {best_val}')
print('\nBest tour found:')
print(best_x)
print('\nLogbook')
print(logbook)

Length of best tour 63.0

Best tour found:
[4, 2, 0, 1, 3, 5, 6]

Logbook
avg   	evals	gen	ls	max	min	std    
169.35	15   	0  	5 	343	63 	109.046
128.2 	18   	1  	2 	250	63 	70.6637
86.25 	19   	2  	3 	249	63 	55.7583
96    	20   	3  	5 	334	63 	72.3512
72.95 	12   	4  	5 	250	63 	40.6245
90.5  	16   	5  	0 	245	63 	64.6371
81.65 	11   	6  	5 	251	63 	55.952 
72.4  	18   	7  	4 	251	63 	40.9737
100   	16   	8  	5 	251	63 	67.8609
135.45	16   	9  	5 	425	63 	105.3  
104.65	17   	10 	4 	252	63 	74.6005


In [9]:
# 48 cities
import json
with open("data/Caps48.json", "r") as tsp_data:
    tsp = json.load(tsp_data)
distance_matrix = tsp["DistanceMatrix"]
individual_size = tsp["TourSize"]

best_val,best_x,logbook = genetic_algorithm(tour_distance,ind_type='permutation',ind_size=individual_size,pop_size=100, cx_prob = .8, mut_prob = 0.2, max_gen = 400, max_no_improve = 50,loc_search_fun = two_opt,ls_num_update = 5,ls_update_type = 'best',user_config_dict = {"mate_op":"cxOrdered"},dist_mat=distance_matrix)
print(f'Length of best tour {best_val}')
print('\nBest tour found:')
print(best_x)
print('\nLogbook')
print(logbook)

Length of best tour 17778413.0

Best tour found:
[6, 17, 43, 30, 37, 8, 7, 0, 15, 21, 2, 39, 14, 11, 10, 22, 12, 24, 13, 33, 40, 1, 28, 4, 47, 41, 9, 25, 3, 34, 44, 23, 31, 38, 20, 46, 19, 32, 45, 35, 29, 42, 16, 26, 18, 36, 5, 27]

Logbook
avg        	evals	gen	ls	max        	min        	std        
7.12387e+07	80   	0  	5 	8.8606e+07 	1.79155e+07	1.71706e+07
6.34691e+07	82   	1  	0 	8.24698e+07	2.14075e+07	1.68971e+07
4.91584e+07	75   	2  	5 	7.82607e+07	1.77784e+07	1.74392e+07
3.9402e+07 	88   	3  	1 	6.57424e+07	1.8366e+07 	1.15764e+07
3.42306e+07	90   	4  	4 	5.79716e+07	1.80778e+07	9.09032e+06
3.12936e+07	78   	5  	3 	6.25613e+07	1.85027e+07	8.79728e+06
2.98068e+07	77   	6  	0 	5.45261e+07	1.85027e+07	7.54591e+06
2.91187e+07	78   	7  	1 	6.31069e+07	1.85027e+07	7.51139e+06
2.74607e+07	87   	8  	3 	5.28982e+07	1.82549e+07	6.84537e+06
2.49676e+07	82   	9  	2 	5.22736e+07	1.85027e+07	6.16665e+06
2.43939e+07	76   	10 	4 	4.92483e+07	1.85027e+07	5.95372e+06
2.3639e+07 	82   	11 	4 	5.

# Rastrigin - local search

In [10]:
from scipy.optimize import minimize

def rastrigin(x):
    # pass a single vector of length n (=dim) to evaluate Rastrigin
    xnp = np.array(x)
    return sum(xnp**2 + 10 - 10 * np.cos(2 * np.pi * xnp))

def loc_search_rast(x0):
    soln = minimize(rastrigin,x0,bounds=[[-5.12,5.12]]*len(x0))
    return soln.x, soln.fun

best_val,best_x,logbook = genetic_algorithm( rastrigin, ind_type='float', ind_size=10, pop_size=200, cx_prob = .7, mut_prob = 0.2, max_gen = 200, max_no_improve = 100, minimize=True, loc_search_fun = loc_search_rast, lower=-5.12, upper = 5.12 )
print(f'Minimum Value {best_val}')
print('\n x:')
print(best_x)

print('\nLogbook')
print(logbook)

Minimum Value 1.4210854715202004e-14

 x:
[1.0747629466500648e-09, 1.0217419906405532e-09, 3.484194210345456e-09, -1.152084734085466e-09, -3.6889386148608306e-09, 3.4240640930003736e-09, -1.2393557307787487e-09, 4.985880359590036e-09, 3.613314987452714e-09, 4.443993915709137e-09]

Logbook
avg     	evals	gen	ls	max    	min        	std    
160.997 	143  	0  	1 	230.335	30.8437    	26.329 
148.243 	143  	1  	0 	220.221	81.7944    	24.4291
132.632 	163  	2  	1 	199.771	65.476     	25.616 
114.907 	155  	3  	1 	187.52 	20.8941    	26.2439
100.342 	147  	4  	1 	163.787	20.8941    	23.2832
97.4519 	149  	5  	1 	155.6  	51.9961    	21.9013
93.3555 	148  	6  	1 	161.623	51.9546    	21.7004
86.5988 	136  	7  	1 	163.508	38.2225    	20.1105
85.4728 	153  	8  	1 	162.154	23.879     	23.1628
79.7898 	141  	9  	1 	141.998	23.879     	22.3744
79.6209 	165  	10 	1 	146.967	29.1119    	22.8915
76.2458 	156  	11 	1 	136.628	10.9445    	22.856 
73.3563 	140  	12 	0 	140.812	10.9445    	22.9693
68.5642 	1