In [3]:
"""
IslandMethod
Added by Davide Anghileri, Nathan Consuegra, 2017
"""
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql import Row
import numpy as np
import pandas as pd
import random
from datetime import datetime
import re

class IslandMethod:
    
    def __init__(self, diffEvo, workers = 4, localIt = 2):
        # Island based input parameters check
        if diffEvo == None:
            raise ValueError("The differential evolution object is needed.")
            
        # Set internal parameters
        self.workers = workers
        self.diffEvo = diffEvo
        self.localIt = localIt
        
        # Create an rdd with the partition number for each chromosome at random but evenly distributed
        self.partitionRDD()
        
        
    def execute(self, sc):   
        #create the NP x D np.matrix with random numbers between +- bound
        initialPop = self.diffEvo.initialPopulation()

        #create the NP x D dataframe with random numbers between +- bound
        #df2 = pd.DataFrame(np.random.random_sample((NP,D))*bound*2-bound)
    
        # Parallelize the population
        rdd = sc.parallelize(initialPop)
        
        for x in range(self.localIt):
            print("\nCurrent iteration:", x)
            
            # Recompute the key for the next iteration
            self.partitionRDD()
            
            # print("random indexes genereted: " , x)
            index = sc.parallelize(self.indexes)
            rdd = index.zip(rdd)
            
            # Partition by the key previously computed at random
            rdd = rdd.partitionBy(self.workers)
            print("Indexes:", self.indexes)
            print("Number of partitions:", rdd.getNumPartitions())
            print("\nPartitions structure: {}".format(rdd.glom().collect()))

            # Execute the algorithm for each island
            rdd = rdd.mapPartitionsWithIndex(self.island)
            print("\nValues after island: {}".format(rdd.glom().collect()))
            rdd = sc.parallelize(rdd.values().collect())
    
        
    def partitionRDD(self):
        self.indexes = []
        for i in range(0, self.workers):
            for k in range(0, int(self.diffEvo.np / self.workers)):
                self.indexes.append(i)
        
        random.shuffle(self.indexes)
        
        
    def island(self, index, iterator): 
        # Set iteration variables
        oldPop = list(iterator)
        localItaration=0
        stop = False
        
        # The loop
        while(not stop):
            newPop = []
            localItaration = localItaration + 1
            
            # For each chromosome in the population
            for i in range(0,len(oldPop)):
                targetPartition = oldPop[i][0]
                target = oldPop[i][1]
                
                #select 2 chromosomes at random
                indA = int(random.random()*len(oldPop))
                while(indA==i):
                    indA = int(random.random()*len(oldPop))
                indB = int(random.random()*len(oldPop))
                while(indB==i or indB==indA):
                    indB = int(random.random()*len(oldPop))
                indC = int(random.random()*len(oldPop))
                while(indC==i or indC==indB or indC==indA):
                    indC = int(random.random()*len(oldPop))
                
                # Execute mutation and crossover
                donor = self.diffEvo.mutation(oldPop[indA][1], oldPop[indB][1], oldPop[indC][1])
                trial = self.diffEvo.crossover(target, donor)
                
                # evaluate the new chromosome and if it is  better substitute it (FOR NOW IS RANDOM)
                if(random.random()>0.9):
                    newPop.append((targetPartition,trial))
                else:
                    newPop.append((targetPartition,target))

            #check stopping criteria
            if(localItaration == self.diffEvo.maxIt):
                stop = True
            #substitute the population
            oldPop = newPop

        return iter(newPop)   