In [1]:
"""
IslandMethod
Added by Davide Anghileri, Nathan Consuegra, 2017
"""
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql import Row
import numpy as np
import pandas as pd
import random
from datetime import datetime
import re

class IslandMethod:
    
    def __init__(self, diffEvo, workers = 1, localIt = 2):
        # Island based input parameters check
        if diffEvo == None:
            raise ValueError("The differential evolution object is needed.")
            
        # Set internal parameters
        self.workers = workers
        self.diffEvo = diffEvo
        self.localIt = localIt
        self.partitions = np.arange(diffEvo.np)
        self.evalAccum = 0
        self.migrationCount = 0
        
        
    def execute(self, sc):
        stop = False
        #create the NP x D dataframe with random numbers between +- bound
        #df2 = pd.DataFrame(np.random.random_sample((NP,D))*bound*2-bound)
        
        # Create the NP x D np.matrix with random numbers between +- bound
        initialPop = self.diffEvo.initialPopulation()
        pop_rdd = sc.parallelize(initialPop)
        
        # Parallelize the keys (partitions)
        keys = sc.parallelize(self.partitions)
        
        # Setting initial individuals with initial score
        individuals = keys.zip(pop_rdd)
        individuals = individuals.map(lambda x: (x[0], (x[1], self.diffEvo.func(x[1]))))
        self.evalAccum += individuals.count()
        print("\nNumber of Evaluations",self.evalAccum)
        print("\nInitial partition structure: {}".format(individuals.glom().collect()))
        
        while (not stop):
            #print("\nCurrent iteration:", x)
            
            # Partition by the key previously computed at random
            individuals = individuals.partitionBy(self.workers)
            #print("Indexes:", self.partitions)
            print("Number of partitions:", individuals.getNumPartitions())
            #print("\nPartitions structure: {}".format(individuals.glom().collect()))

            # Execute the algorithm for each island
            individuals = individuals.mapPartitions(self.island).collect()
            individuals = sc.parallelize(individuals)
            
            bestChromosome = individuals.takeOrdered(1, lambda x: x[1][1])
            print("\nBest chromosome ->:", bestChromosome)
            print("\nValues after island: {}".format(individuals.sortBy(lambda x: x[1][1]).collect()))
            print("\nValues after island 2: {}".format(individuals.sortBy(lambda x: x[1][1]).collect()))
            
            
            
            #update evaluations number (add the number of chromosomes evaluated)
            self.evalAccum += individuals.count()*self.localIt
            
            # Recompute and assign the keys for the next iteration
            individuals = self.migration(individuals)
            self.migrationCount = self.migrationCount + 1
            print("\nMigration number:",self.migrationCount)
            print("\nNumber of Evaluations",self.evalAccum)
            
            
            
            #check stopping criteria
            #if reached maximum number of evaluations
            if(self.evalAccum >= self.diffEvo.maxEval):
                stop = True
                print("\nThe algorithm stopped because maximum number of evaluations (",self.diffEvo.maxEval, ") is reached: ",self.evalAccum)
            
            #if(bestChromosome[1][1] <= 0):
             #   stop = True
             #   print("\nThe algorithm stopped because maximum number of evaluations (",self.diffEvo.maxEval, ") is reached: ",self.evalAccum)
        
        print("\nBest chromosome:", individuals.takeOrdered(1, lambda x: x[1][1]))
            
    
    def migration(self, individuals):
        random.shuffle(self.partitions)
        return individuals.coalesce(1).mapPartitions(self.migrate)
        
        
    def migrate(self, iterator):
        update = []
        elements = list(iterator)
        for i in range(0, len(elements)):
            update.append((self.partitions[i], (elements[i][1][0], elements[i][1][1])))
        
        return iter(update)
        
        
    def island(self, iterator): 
        # Set iteration variables
        oldPop = list(iterator)
        localItaration=0
        stop = False
        
        # The loop
        while(not stop):
            newPop = []
            localItaration = localItaration + 1
            localEvaluations = 0 
            # For each chromosome in the population
            for i in range(0, len(oldPop)):
                targetPartition = oldPop[i][0]
                target = oldPop[i][1][0]
                curr_score = oldPop[i][1][1]
                
                # Select 3 chromosomes at random
                indA = int(random.random() * len(oldPop))
                while(indA==i):
                    indA = int(random.random() * len(oldPop))
                indB = int(random.random() * len(oldPop))
                while(indB==i or indB==indA):
                    indB = int(random.random() * len(oldPop))
                indC = int(random.random() * len(oldPop))
                while(indC==i or indC==indB or indC==indA):
                    indC = int(random.random() * len(oldPop))
                
                # Execute mutation and crossover
                donor = self.diffEvo.mutation(oldPop[indA][1][0], oldPop[indB][1][0], oldPop[indC][1][0])
                trial = self.diffEvo.crossover(target, donor)
                
                # Evaluate the new chromosome and if it is better substitute it
                new_score = self.diffEvo.func(trial)
                if(new_score < curr_score):
                    newPop.append((targetPartition,(trial,new_score)))
                else:
                    newPop.append((targetPartition,(target,curr_score)))
                localEvaluations = localEvaluations + 1
                
            #check stopping criteria
            if(localItaration == self.localIt):
                stop = True

            #substitute the population
            oldPop = newPop

        return iter(oldPop)   