In [4]:
import gym
import numpy as np
import math
import random

env = gym.make('Marvin-v0')


In [5]:
class Marvin(object):
    
    def __init__(self, lifespan=200, genes=None):
        self.genes = [] if genes == None else genes
        self.lifespan = lifespan
        for _ in range(self.lifespan):
            gene = []
            for _ in range(4):
                gene.append(random.random())
            self.genes.append(gene)
        self.fitness = 0
        
    def setFitness(self, fitness):
        self.fitness = fitness
    
    def __str__(self):
        message = "marvin_genes: [{}]\nmarvin_fitness: {}\n"
        return message.format(len(self.genes), self.fitness)
    
    def evaluate(self):
        env.reset()
        done = False
        
        for action in self.genes:
            _, reward, done, info = env.step(action)
            self.fitness += reward
            if done == True:
                break
            
        return self.fitness
        
            
    
    

In [6]:
def sortMarvin(marvin):
    return marvin.fitness

class MarvinTown(object):
    
    def __init__(self, population = 50, lifespan = 200):
        self.marvins = []
        self.history = []
        self.mutation_rate = 0.1
        self.lifespan = lifespan
        self.population = population
        for _ in range(self.population):
            self.marvins.append(Marvin(self.lifespan))
        
    def __str__(self):
        message = "Marvin Town Population: {}"
        return message.format(len(self.marvins))
    
    def maxFitness(self):
        maxF = -1000
        for marvin in self.marvins:
            if marvin.fitness > maxF:
                maxF = marvin.fitness
        return maxF
    
    def bestMarvin(self):
        return self.marvins[0]
    
    def averageFitness(self):
        fitness_record = self.history[-1]
        totalF = 0
        for fitness_score in fitness_record:
            totalF += fitness_score
        return totalF / len(fitness_record)
    
    def evaluate(self):
        fitnesses  = []
        fitness_sum = 0
        for marvin in self.marvins:
            fitness = marvin.evaluate()
            fitnesses.append(fitness)
            fitness_sum += fitness
        for index, marvin in enumerate(self.marvins):
            fitnesses[index] = fitnesses[index] / fitness_sum 
            marvin.setFitness(fitnesses[index])
        self.history.append(fitnesses)
        self.marvins = sorted(self.marvins, key = sortMarvin, reverse = True)
#         print("marvin1 = {} and marvin2 = {}".format(self.marvins[0].fitness, self.marvins[-1].fitness))
        return fitnesses
    
    def crossover(self, parentA, parentB):
        genesA = parentA.genes
        genesB = parentB.genes
        random_threshold = random.uniform(1, len(genesB))
        newGenes = []
        for gene in range(len(genesA)):
            if gene < random_threshold:
                newGenes.append(genesA[gene])
            else:
                newGenes.append(genesB[gene])
        return newGenes
    
    def selectParents(self):
        previous = 0
        accumulatedFitness = []
        for marvin in self.marvins:
            accumulatedFitness.append(previous + marvin.fitness)
            previous += marvin.fitness
        
        parentA = None
        parentB = None
        
        dice = random.uniform(0, 1)
        for index, accF in enumerate(accumulatedFitness):
            if dice <= accF:
                if (parentA == None):
                    parentA = self.marvins[index]
                elif (parentB == None):
                    parentB = self.marvins[index]
                else:
                    break
        return (parentA, parentB)
        
    def mutation(self, genes):
        updatedGenes = []
        for gene in genes:
            dice = random.uniform(0,1)
            if (dice > self.mutation_rate):
                updatedGenes.append(env.action_space.sample())
            else:
                updatedGenes.append(gene)
        return updatedGenes
    
    def nextGeneration(self):
        new_generation = []
        fitnessess = self.history[-1]
        
        while len(new_generation) < len(self.marvins):
            parentA, parentB = self.selectParents()
            if (not parentB or not parentA):
                continue
            newGenes = self.crossover(parentA, parentB)
            newGenes = self.mutation(newGenes)
            child = Marvin(lifespan=self.lifespan, genes = newGenes)
            new_generation.append(child)
        self.marvins = new_generation
            
        
        
        
        
        
        

In [7]:
population = MarvinTown(lifespan = 200, population = 10)
# fitnessess = population.evaluate()

In [8]:
def renderSolution(genes):
    env.reset()
    for i in range(500):
        env.render()
        action = genes[i]
        _, _, done, info = env.step(action)
        if done == True:
            break
        

In [None]:
average_fitness = 0 

count = 0
print("started\n")
while average_fitness != 0.40 and count < 600:
    fitnessess = population.evaluate()
    average_fitness = population.averageFitness()
    max_fitness = population.maxFitness()
    if (count%50 == 0):
        print("{} => avF: {}, mxF: {}\n".format(count, average_fitness, max_fitness))
        
        renderSolution(population.bestMarvin().genes)
    count+=1
    population.nextGeneration()


started

0 => avF: 0.1, mxF: 0.10038926459992716

50 => avF: 0.10000000000000002, mxF: 0.11351163625810959

100 => avF: 0.09999999999999998, mxF: 0.11637476962529852

150 => avF: 0.09999999999999999, mxF: 0.11514736646519946

200 => avF: 0.10000000000000002, mxF: 0.1147784659351856

250 => avF: 0.1, mxF: 0.12152815133922579

300 => avF: 0.09999999999999999, mxF: 0.1310822564877973

350 => avF: 0.10000000000000002, mxF: 0.11875134098423781

400 => avF: 0.09999999999999999, mxF: 0.12443537765215687



In [None]:
print(population)