In [1]:
# imports
import GAUtilities as ga
import numpy as np
import pandas as pd
from simanneal import Annealer
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
from scipy.optimize import minimize
import random
import warnings
warnings.filterwarnings('ignore')

# Self-Assessment: Using Maximization with GA
To maximize, we need to negate the fitness function, because our genetic algorithm only minimizes. Negating the fitness function is simple. You simply add a negative sign before the return variable. Let's set up the same population used in the lesson and get the maximized fitness.


In [2]:
np.random.seed(10101010) #for everything, everything, everything... (Courtesy of the Violent Femmes)
pop_size = 6 # should be even due to the way we'll implement crossover
ind_size = 3 #this is the number of genes in each individual

#bounds are used for both real and integer problems. 
#For integer problems, the upper bound should be 1 over what you actually want
bounds = [1,7]

#each type of problem might use different types of populations. This one is a simple matrix of integers.
pop = np.random.randint(low=bounds[0], high=bounds[1], size =(pop_size,ind_size))

#this is our objective function for this particular problem. Each problem requires a different objective function.
def obj_sumDice(x):
    x = np.array(x) # force a numpy arrray here so that the math below works
    return -np.sum(x)

#compute the fitness by passing in the function and population
fitness = ga.computeFitness(obj_sumDice, pop)

fitness

array([ -9., -13.,  -9., -15., -13.,  -7.])

Because our fitness function is so simple, and because we know that we're always using numpy arrays, we could also just pass numpy's sum function directly to our helper function like this, to get the minimization fitnesses.

In [3]:
#calling np.sum directly without our wrapper function
fitness = ga.computeFitness(np.sum, pop)
fitness

array([ 9., 13.,  9., 15., 13.,  7.])

But, if you try to put a negative sign in front of np.sum, you'll get an error. Numpy, though, has it's own negation function. We could call it like this to turn np.sum into a maximization function.

In [4]:
#negating np.sum for a maximization problem.
fitness = np.negative(ga.computeFitness(np.sum, pop))
fitness

array([ -9., -13.,  -9., -15., -13.,  -7.])

# *Self-Assessment:  Exploring Tournament Selection*

*What happens for smaller tournament sizes?*  You should notice that there is more diversity in the selected population and more high value fitness values get selected.  There are fewer repeats in the selected population.

*For larger tournament sizes?* There is less diversity in the selected population and mostly low value fitness values get selected.  There are more repeats in the selected population.

*For tournament size 1?* This yields the most diverse population with fewest repeats.

*For tournament size the same as the population size?* The selected population contains only the individual with the lowest fitness value.  This means crossover will have no effect since all the individuals are the same.  Only the mutation operator will have an effect.

*How does tournament size affect the exploration versus exploitation tradeoff?*. Small tournament sizes encourage more exploration and less exploitation while larger tournament sizes have the opposite effect.

# *Self-Assessment:  Crossover probability*

* *What happens if `cx_prob = 0`?* No mating occurs so there is no sharing of information between individuals.  This would result in a population of parallel random local searches.

* *What happens if `cx_prob=1`?*  Every pair of individuals mates, this means that there is no chance that a very good solution survives more than one generation unless it happens to mate with a copy of itself.

# *Self-Assessment: Mutation Parameters:*

* *What is the effect of `mut_prob = 1`?*  Every individual is mutated.
* *What is the effect of `mut_prob = 0`?*  No individuals are mutated so the genetic algorithm uses only mating to improve the population.
* *What is the effect of increasing `ind_prob`?*  Larger values mean more changes in the individual.
* *What would happen if you made `sigma` really large?*  The mutations could result in very large steps which could make the search behave erratically.  Mutated individuals might have very little in common with their parents.  Large exploration and small exploitation.
* *What would happen if you made `sigma` really small?*  The steps would be very small so the search remains very local.  Small exploration and large exploitation.

# *Self-Assessment:  Genetic Algorithm for the Cargo-loading Problem*

In [9]:
# load the data + random assignment
np.random.seed(5)
num_cars = 4
num_items=1000
weights = np.random.randint(2,20,size=num_items)
np.random.seed()

def obj_cargo(groups, weights, showOutput=False):
    
    #set up a dataframe
    df = pd.DataFrame({'weights': weights, 'groups': groups}).set_index('groups').groupby(groups).agg(['count', 'sum']).rename(columns={'sum':'weight_sum','count' : 'group_count'})
    df['weight_difference'] = df.diff()[('weights',   'weight_sum')].abs()
    df['count_difference'] = df.diff()[('weights',   'group_count')].abs()
    
    finalVal = np.sum(df['weight_difference']) + (50*np.sum(df['count_difference']))
    
    if showOutput:
        display(df)
        print(f'The current fitness is {finalVal}')

    return finalVal

def mov_cargo(groups, debug=False):
    #get the unique groups
    choices = np.unique(groups)
    #get a copy of the groups
    new_groups = groups.copy()    
    #select item to change
    switch = np.random.randint(0, groups.shape[0])
    #select new group value
    new_group = np.random.choice(choices)
    while groups[switch] == new_group:
        new_group = np.random.choice(choices)
    new_groups[switch] = new_group    
    if debug:
        print(f'The item at {switch} should change to {new_group}')
        print(f'The initial groups are: {groups} and the changed groups are {new_groups}')
       
    return new_groups

def cargo_search(weights, num_groups, max_no_improve, debug=False):
  
    #get the total number of items
    num_items = weights.shape[0]
    
    #assign them to the number of groups
    current_groups = np.random.randint(low=0,
                                          high=num_groups, size=num_items)
    #get the current_fitness
    current_fitness =  obj_cargo(weights, current_groups)
        
    num_moves_no_improve = 0
    while (num_moves_no_improve < max_no_improve):
        num_moves_no_improve += 1
        new_groups = mov_cargo(current_groups, debug)
        new_fitness = obj_cargo(weights, new_groups)
        if debug:
            print(f'Old fitness: {current_fitness}, New fitness {new_fitness}')
        if new_fitness < current_fitness:
            current_fitness = new_fitness
            current_groups = new_groups
            num_moves_no_improve = 0
    return current_fitness, current_groups



In [12]:
pop_size = 20 # should be even due to the way we'll implement crossover
ind_size = num_items # determines number of input variables for each individual
tourn_size = 3 # tournament size for selection
cx_prob = 0.7 # probability a pair of parents crossover to produce two children
mut_prob = 0.3 # probability an individual mutates
ind_prob = 0.1 # probability each variable in an individual mutates
num_iter = 500 # number of genetic algorithm mutations
update_iter = 100 # how often to display output



#initialize population and fitness
pop = np.random.randint(low=0, high=num_cars, size = (pop_size,ind_size))

#note how we're passing named parameters into the computeFitness function
fitness = ga.computeFitness(obj_cargo, pop, weights=weights)

# initialize stats and output
stats, best_fitness, best_x = ga.initStats(fitness, pop, num_iter)

#This is where the guts of the algorithm start
for iter in range(num_iter):
    #sort the population
    pop = ga.sortPop(pop, fitness)
    # tournament selection
    selected_pop = ga.tournamentSelection(pop, tourn_size).astype(int)
    
    # one-point crossover (mating)
    cx_pop = ga.onePointCrossover(selected_pop, cx_prob).astype(int)

    # uniform int mutation
    mut_pop = ga.uniformIntMutation(cx_pop, mut_prob, ind_prob, [0, num_cars]).astype(int) 

    # copy mutated pop and re-run fitness
    #pop = mut_pop.copy()
    pop = ga.addElitism(pop, mut_pop, 1)
    fitness = ga.computeFitness(obj_cargo, pop, weights=weights)

    # collect stats and output to screen
    stats, best_fitness, best_x = ga.updateStats(stats, fitness,best_x, pop, iter, update_iter)


#####################
# Everything in the algorithm is done, and now we're just outputting the final result
#####################
print(f"The minimum difference between box car weights is {best_fitness:.0f}")
print("That happens when:")
obj_cargo(best_x, weights, showOutput=True)

Iteration | Best this iter |    Best ever
        1 |      8.690e+02 |    8.690e+02
      100 |      1.830e+02 |    1.830e+02
      200 |      1.830e+02 |    1.830e+02
      300 |      1.440e+02 |    1.440e+02
      400 |      1.440e+02 |    1.440e+02
      500 |      1.440e+02 |    1.440e+02
The minimum difference between box car weights is 144
That happens when:


Unnamed: 0_level_0,weights,weights,weight_difference,count_difference
Unnamed: 0_level_1,group_count,weight_sum,Unnamed: 3_level_1,Unnamed: 4_level_1
0,250,2588,,
1,250,2687,99.0,0.0
2,250,2655,32.0,0.0
3,250,2668,13.0,0.0


The current fitness is 144.0


144.0