genetic_algorithm.py

# -*- coding: utf-8 -*-
"""GA code.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1CIEAP63w7YABjU0UbX5Wu3DGOEeZ7NIX

# Fitness function
"""

import os
import sys
import numpy as np
from statistics import mode, mean, median, stdev, variance, quantiles
import math
from tqdm import tqdm
import itertools
from functools import reduce
import operator

from search import aStarSearch
from searchAgents import CornersProblem, SearchAgent, FoodSearchProblem, PositionSearchProblem
import pacman
import layout

from heuristics import *

class GeneticAlgorithm:

    def __init__(self, 
                 n_genes,
                 n_iterations,
                 lchrom, 
                 pcross, 
                 pmutation, 
                 crossover_type, 
                 mutation_type, 
                 selection_type, 
                 popsize, 
                 n_elites,
                 game_to_train_on,
                 method_of_joining_heuristics,
                 heuristics_list,
                 random_state = None):
        

        self.n_genes = n_genes
        self.lchrom = lchrom
        self.popsize = popsize
        self.pcross = pcross
        self.pmutation = pmutation
        self.crossover_type = crossover_type
        self.mutation_type = mutation_type
        self.selection_type = selection_type
        self.random_state = random_state
        self.n_iterations = n_iterations
        self.n_elites = n_elites
        self.game_to_train_on = game_to_train_on
        self.method_of_joining_heuristics = method_of_joining_heuristics
        self.heuristics_list = heuristics_list
        self.best_fitness_evolution = []
    
        pop = []
        while (len(pop) <= self.popsize):
            chromosome = np.random.randint(2, size= self.n_genes)
            # if chromosome = set of 0 then generate new one
            if sum(chromosome) != 0:
                pop.append(chromosome)

            
        # Convert pop to list of solutions
        self.population = [tuple(x) for x in pop]

    
    def fitness_func(self, solution):
        # should maximize
        c = 5
        epsilon = 10**(-c)

        gameState = pacman.GameState()
        lay = layout.getLayout(self.game_to_train_on.layoutName)
        gameState.initialize(lay, 0)
        problem = self.game_to_train_on.problemClass(gameState)

        set_of_h = self.get_heuristic_set_from_ind(individual=solution)
        new_heuristic = self.get_new_function_from_set_of_h(set_of_h)

        agent = self.game_to_train_on.agentClass(new_heuristic)
        agent.searchFunction(problem)
        cost = problem._expanded

        if cost == 0:
            return epsilon
        else:
            return (1/cost)*(10**c)

    def get_heuristic_set_from_ind(self, individual):
        set_of_h = []
        for _ in range(len(individual)):
                if individual[_]:
                    set_of_h.append(self.heuristics_list[_])
        return set_of_h
    
    def get_new_function_from_set_of_h(self, set_of_h):
        def new_heuristic(state, problem):
            def wrapper_function(start, goal):
                values = [h(start, goal) for h in set_of_h]
                if len(set_of_h) == 0:
                  return 0

                return self.method_of_joining_heuristics(values)
            
            """
            SAME AS cornerHeuristic but for a geneticAlgorithm
            """
            if self.game_to_train_on.problemClass == CornersProblem:
                unvisited_corners = problem.unvistedCorners(state)
                if len(unvisited_corners) == 0:
                    return 0

                if len(unvisited_corners) == 1:
                    return wrapper_function(state.position, unvisited_corners[0])


                perms = itertools.permutations(unvisited_corners)
                min_cost = float('inf')
                for perm in perms:
                    perm = [state.position] + list(perm)

                    cost = 0
                    for i in range(len(perm)-1):
                        cost += wrapper_function(perm[i], perm[i+1])
                    if cost < min_cost:
                        min_cost = cost
                return min_cost

            """
            SAME AS foodHeuristic but for a geneticAlgorithm
            """
            def exactDistanceUsingAStar(start, goal, gameState):
                def h(start, problem):
                    return wrapper_function(start, problem.goal)
                return len(aStarSearch(PositionSearchProblem(gameState, start=start, goal=goal, warn=False, visualize=False), h))

            if self.game_to_train_on.problemClass == FoodSearchProblem:
                position, foodGrid = state

                food_list = foodGrid.asList()

                if len(food_list) == 0:
                    return 0
                if len(food_list) == 1:
                    return wrapper_function(position, food_list[0])

                closest_point = food_list[0]
                furthest_point = food_list[0]

                for food in food_list:
                    estimated_distance_to_closest = 0
                    if str((position, closest_point)) in problem.heuristicInfo:
                        estimated_distance_to_closest = problem.heuristicInfo[str((position, closest_point))]
                    else:
                        estimated_distance_to_closest = wrapper_function(position, closest_point)
                        problem.heuristicInfo[str((position, closest_point))] = estimated_distance_to_closest
                    
                    estimated_distance_to_speculated_closest = wrapper_function(position, food)
                    if estimated_distance_to_speculated_closest < estimated_distance_to_closest:
                        closest_point = food
                        problem.heuristicInfo[str((position, closest_point))] = estimated_distance_to_speculated_closest
                    
                    
                    estimated_distance_to_furthest = 0
                    if str((position, furthest_point)) in problem.heuristicInfo:
                        estimated_distance_to_furthest = problem.heuristicInfo[str((position, furthest_point))]
                    else:
                        estimated_distance_to_furthest = wrapper_function(position, furthest_point)
                        problem.heuristicInfo[str((position, furthest_point))] = estimated_distance_to_furthest
                    
                    estimated_distance_to_speculated_furthest = wrapper_function(position, food)
                    if estimated_distance_to_speculated_furthest > estimated_distance_to_furthest:
                        furthest_point = food
                        problem.heuristicInfo[str((position, furthest_point))] = estimated_distance_to_speculated_furthest
                return exactDistanceUsingAStar(position, closest_point, problem.startingGameState) + wrapper_function(closest_point, furthest_point)
            
        return new_heuristic

    def get_fitness_scores(self):
        scores = [self.fitness_func(ind) for ind in self.population]
        return np.array(scores)

    def __append_best_score(self, scores):
        best_score = np.max(scores)
        self.best_fitness_evolution.append(best_score)
        return 'Ok'
    
    def __ranking_selection(self, scores):
        ind = np.argsort(scores)

        s = sum(ind)
        t = np.random.rand() * s
        partial_sum = 0
        i=0
        while(partial_sum <t and i <len(scores)):
            partial_sum += scores[i]

        selected = i
        return selected 
    
    def __roulette_selection(self, scores):
        s = sum(scores)
        t = np.random.rand() * s
        partial_sum = 0
        i=0
        while(partial_sum <t and i <len(scores)):
            partial_sum += scores[i]

        selected = i
        return selected

    def select(self, scores, selection_type):

        if selection_type not in ['ranking', 'roulette']:
            raise ValueError('Type should be ranking or tournament')

        if selection_type == 'ranking':
            ind = self.__ranking_selection(scores)
        elif selection_type == 'roulette':
            ind = self.__roulette_selection(scores)
        else:
            pass
        return ind

    def flip(self, p):
        return 1 if np.random.rand() < p else 0

    def __crossover(self, 
                    parent1, 
                    parent2, 
                    crossover_type,
                    pcross,
                    lchrom):
        
        if crossover_type not in ['uniform', 'one_point', 'two_point']:
                raise ValueError('crossover_type should be one of uniform, one_point or multi_point')
            
        if crossover_type == 'one_point':
            index = np.random.choice(range(1, lchrom)) 

            parent1 = list(parent1)
            parent2 = list(parent2)

            child1 = parent1[:index] + parent2[index:]
            child2 = parent2[:index] + parent1[index:]
            children = [tuple(child1), tuple(child2)]
        elif crossover_type == 'two_point':
            point1 = np.random.choice(range(1, lchrom)) 
            point2 = np.random.choice(point1, range(lchrom))
            child1 = parent1[:point1] + parent2[point1: point2] + parent1[point2:]
            child2 = parent2[:point1] + parent1[point1: point2] + parent2[point2:]
            children = [child1, child2]
        elif crossover_type == 'uniform':
           
            t = np.random.rand()
            temp = np.random.rand(lchrom)
            child1 = [parent1[i] if temp[i] > t else parent2[i] for i in range(len(temp))]
            child2 = [parent2[i] if temp[i] > t else parent1[i] for i in range(len(temp))]
            children = [child1, child2]

        return children
    

    def __mutation(self, individual, mutation_type):

        if mutation_type not in ['bitstring', 'inversion', 'swap']:
            raise ValueError('mutation_type should be one of bitstring or inversion or swap')


        index = np.random.choice(len(individual))
        index2 = np.random.choice(len(individual))
        
        # Convert individual to list so that can be modified
        individual_mod = list(individual)
        if mutation_type == 'bitstring':
            individual_mod[index] = 1 - individual_mod[index]
        elif mutation_type == 'inversion':
            individual_mod= individual_mod[0:index] + individual_mod[index2:index-1:-1] + individual_mod[index2+1:]
        elif mutation_type == 'swap':
            individual_mod[index], individual_mod[index2] = individual_mod[index2], individual_mod[index]
        else:
            pass
        
        individual = tuple(individual_mod)

        return individual

    def optimize(self):

        for i in tqdm(range(self.n_iterations)):

            # calculate fitness score
            scores = self.get_fitness_scores()
            

            # choose the elites of the current population
            ind = np.argsort(scores)

            elites = [self.population[i] for i in ind[-self.n_elites:]]

            #append the elites to the population
            new_population = [tuple(elite) for elite in elites]

            # make selection
            j = self.n_elites
            while j <= self.popsize:
                # select parents from population
                mate1 = self.select(scores, self.selection_type)
                mate2 = self.select(scores, self.selection_type)

                mate1 = tuple(self.population[mate1])
                mate2 = tuple(self.population[mate2])

                if self.flip(self.pcross):
                    children = self.__crossover(mate1, mate2, self.crossover_type, self.pcross, self.lchrom)
                    children = [tuple(child) for child in children]
                else:
                    children = [mate1, mate2]
                
                if self.flip(self.pmutation):
                    children[0] = self.__mutation(children[0], self.mutation_type)

                if self.flip(self.pmutation):
                    children[1] = self.__mutation(children[1], self.mutation_type)

                if sum(tuple(children[0])) != 0:
                    new_population.append(tuple(children[0]))
                    j+=1
                
                if sum(tuple(children[1])) != 0:
                    new_population.append(tuple(children[1]))        
                    j+=1

            self.population = new_population

        # when n_iterations are over, fitness scores
        scores = self.get_fitness_scores()

        # append best score
        _ = self.__append_best_score(scores)

        # get the result wher he results is the best
        best_score_ind =np.argpartition(scores, 0)[0]
    
        best_solution = self.population[best_score_ind]

        return (best_solution, self.best_fitness_evolution[-1])


class GameWrapper:
    def __init__(self, layoutName, problemClass, agentClass):
        self.layoutName = layoutName
        self.problemClass = problemClass
        self.agentClass = agentClass

class GaAgentCornerns(SearchAgent):
        def __init__(self, heuristic):
            self.searchFunction = lambda prob: aStarSearch(prob, heuristic)
            self.searchType = CornersProblem

class GaAgentFood(SearchAgent):
    def __init__(self, heuristic):
        self.searchFunction = lambda prob: aStarSearch(prob, heuristic)
        self.searchType = FoodSearchProblem


def default(str):
    return str + ' [Default: %default]'
def main( argv ):
    from optparse import OptionParser
    usageStr = """
    USAGE:      python genetic_algorithm.py <options>
    EXAMPLES:   (1) python genetic_algorithm.py -p FoodSearchProblem -l trickySearch
                    - starts genetic algorithm on a tricky food search problem
                (2) python genetic_algorithm.py -p CornersProblem -l mediumCorners
    """
    parser = OptionParser(usageStr)
    parser.add_option('-p', '--problem', dest='problem',
                      help=default('the Problem to train the gentic algorithm on'),
                      metavar='TYPE', default='CornersProblem')
    parser.add_option('-l', '--layout', dest='layout',
                      help=default('the LAYOUT_FILE from which to load the map layout'),
                      metavar='LAYOUT_FILE', default='mediumCorners')
    options, otherjunk = parser.parse_args(argv)
    if len(otherjunk) != 0:
        raise Exception('Command line input not understood: ' + str(otherjunk))
    

    l = layout.getLayout( options.layout )
    if l == None: raise Exception("The layout " + options.layout + " cannot be found")

    HEURISTICS_LIST = [
        manhattan_distance,
        euclidean_distance,
        diagonal_distance,   
        max_heuristic,
        min_heuristic,
        null_heuristic
    ]

    method_of_joining_heuristics = {
        'max' : max,
        'min' : min,
        'mean' : lambda x: sum(x)/len(x),
        'mode' : lambda x: max(set(x), key=x.count),
        'median' : lambda x: median(x),
        'range': lambda x: max(x) - min(x),
    }
    

    game_to_train_on = GameWrapper(options.layout, CornersProblem if options.problem == "CornersProblem" else FoodSearchProblem, GaAgentCornerns if options.problem == CornersProblem  else GaAgentFood)

    print("Using problem: " + options.problem)
    print("Using layout: " + options.layout)
    
    for method in method_of_joining_heuristics:
        ga = GeneticAlgorithm(
            n_genes = len(HEURISTICS_LIST),
            n_iterations = 10,
            lchrom = len(HEURISTICS_LIST), 
            pcross = 0.8, 
            pmutation = 0.3, 
            crossover_type = 'one_point', 
            mutation_type = 'bitstring', 
            selection_type = 'ranking', 
            popsize = 20,
            n_elites = 2,
            random_state = 11,
            game_to_train_on = game_to_train_on,
            method_of_joining_heuristics = method_of_joining_heuristics[method],
            heuristics_list = HEURISTICS_LIST
        )

        best_solution, best_fitness = ga.optimize()
        print('\nBest solution:\t', best_solution)

        print('\nBest Fitness:\t', round(best_fitness))
        print('\nBest Cost (number of nodes expanded):\t', round(1/best_fitness * (10**5)))

        print("\nBest solution is made of:\t", end="")
        print(method.upper()
         + "( ", end="")
        for index, is_included in enumerate(best_solution):
            if is_included:
                print(HEURISTICS_LIST[index].__name__ + ",  ", end="")
        print(")")
        print("\n\n----------------------------------\n\n")
if __name__ == "__main__":
    main( sys.argv[1:] )