In [1]:
from classes import *
import numpy as np, random, operator, pandas as pd, matplotlib.pyplot as plt


In [49]:
class Fitness:
    def __init__(self, route):
        self.route = route
        self.distance = 0
        self.fitness= 0.0
    
    def routeDistance(self):
        if self.distance ==0:
            pathDistance = 0
            for i in range(0, len(self.route)):
                fromCity = self.route[i]
                toCity = None
                if i + 1 < len(self.route):
                    toCity = self.route[i + 1]
                else:
                    toCity = self.route[0]
                pathDistance += fromCity.distance(toCity)
            self.distance = pathDistance
        return self.distance
    
    def routeFitness(self):
        if self.fitness == 0:
            self.fitness = - 1 / float(1 + self.routeDistance()) #+1 to avoid division by zero
        return self.fitness

In [3]:
def createRoute(cityList):
    route = random.sample(cityList, len(cityList))
    return route

In [4]:
def initialPopulation(popSize, cityList):
    population = []

    for i in range(0, popSize):
        population.append(createRoute(cityList))
    return population

In [5]:
def rankRoutes(population):
    fitnessResults = {}
    for i in range(0,len(population)):
        fitnessResults[i] = Fitness(population[i]).routeFitness()
    return sorted(fitnessResults.items(), key = operator.itemgetter(1), reverse = True)

In [6]:
def selection(popRanked, eliteSize):
    selectionResults = []
    df = pd.DataFrame(np.array(popRanked), columns=["Index","Fitness"])
    df['cum_sum'] = df.Fitness.cumsum()
    df['cum_perc'] = 100*df.cum_sum/df.Fitness.sum()
    
    for i in range(0, eliteSize):
        selectionResults.append(popRanked[i][0])
    for i in range(0, len(popRanked) - eliteSize):
        pick = 100*random.random()
        for i in range(0, len(popRanked)):
            if pick <= df.iat[i,3]:
                selectionResults.append(popRanked[i][0])
                break
    return selectionResults

In [7]:
def matingPool(population, selectionResults):
    matingpool = []
    for i in range(0, len(selectionResults)):
        index = selectionResults[i]
        matingpool.append(population[index])
    return matingpool

In [8]:
def breed(parent1, parent2):
    child = []
    childP1 = []
    childP2 = []
    
    geneA = int(random.random() * len(parent1))
    geneB = int(random.random() * len(parent1))
    
    startGene = min(geneA, geneB)
    endGene = max(geneA, geneB)

    for i in range(startGene, endGene):
        childP1.append(parent1[i])
        
    childP2 = [item for item in parent2 if item not in childP1]

    child = childP1 + childP2
    return child

In [9]:
def breedPopulation(matingpool, eliteSize):
    children = []
    length = len(matingpool) - eliteSize
    pool = random.sample(matingpool, len(matingpool))

    for i in range(0,eliteSize):
        children.append(matingpool[i])
    
    for i in range(0, length):
        child = breed(pool[i], pool[len(matingpool)-i-1])
        children.append(child)
    return children

In [10]:
def mutate(individual, mutationRate):
    for swapped in range(len(individual)):
        if(random.random() < mutationRate):
            swapWith = int(random.random() * len(individual))
            
            city1 = individual[swapped]
            city2 = individual[swapWith]
            
            individual[swapped] = city2
            individual[swapWith] = city1
    return individual

In [11]:
def mutatePopulation(population, mutationRate):
    mutatedPop = []
    
    for ind in range(0, len(population)):
        mutatedInd = mutate(population[ind], mutationRate)
        mutatedPop.append(mutatedInd)
    return mutatedPop

In [12]:
def nextGeneration(currentGen, eliteSize, mutationRate):
    popRanked = rankRoutes(currentGen)
    selectionResults = selection(popRanked, eliteSize)
    matingpool = matingPool(currentGen, selectionResults)
    children = breedPopulation(matingpool, eliteSize)
    nextGeneration = mutatePopulation(children, mutationRate)
    return nextGeneration

In [13]:
def geneticAlgorithm(population, popSize, eliteSize, mutationRate, generations):
    pop = initialPopulation(popSize, population)
    print("Initial distance: " + str(1 / rankRoutes(pop)[0][1]))
    
    for i in range(0, generations):
        if (i%10 == 0):
            print("-- iteration %d -- " % i)
        pop = nextGeneration(pop, eliteSize, mutationRate)
    
    print("Final distance: " + str(1 / rankRoutes(pop)[0][1]))
    bestRouteIndex = rankRoutes(pop)[0][0]
    bestRoute = pop[bestRouteIndex]
    return bestRoute

In [14]:
import joblib as jb

def write_Slideshow_to_file(slideshow, output_name = "sexybaby.txt"):
    with open(output_name, "w") as file:
        file.write("{}\n".format(len(slideshow.slides)))
        for slide in slideshow.slides:
            ID = slide.id
            if len(ID) == 1:
                file.write("{}\n".format(ID[0]))
            else:
                file.write("{} {}\n".format(ID[0], ID[1]))

#Create a set of slides following the rationale of the groupment by biggest tags
def get_horizontals_from_collection(collection, groupby="average", filename=""):
    
    #Create a set of vertical and horizontal pictures
    null_photo = Photo(orientation="", tags=[])
    collection_V = []
    collection_H = []

    for photo in collection:
        if photo.orientation == "V":
            collection_V.append(photo)
        else:
            collection_H.append(photo)

    ##### Sort pictures in place by number of tags
    collection_V.sort(key=lambda photo: len(photo.tags), reverse=True)
    collection_H.sort(key=lambda photo: len(photo.tags), reverse=True)
    
    ##### generate slides
    m = len(collection_V)
    slides_fromVerticals = []
    index = 0
    if groupby == "2by2":
        while index < m:
            slides_fromVerticals.append(Slide([collection_V[index], collection_V[index+1]]))
            index += 2
    else:
        if m %2 == 1:
            collection_V = collection_V[1:]
        for i in range(m//2):
            slides_fromVerticals.append(Slide([collection_V[m - 1 - i], collection_V[i]]))
    
    slides_fromHorizontals = [Slide([photo]) for photo in collection_H]
    jb.dump(collection_V, "collection_V_{}.joblib".format(filename))
    jb.dump(collection_H, "collection_H_{}.joblib".format(filename))
    jb.dump(slides_fromVerticals, "slides_fromVerticals_{}_{}.joblib".format(groupby,filename))
    jb.dump(slides_fromHorizontals, "slides_fromHorizontals_{}.joblib".format(filename))
    return slides_fromVerticals, slides_fromHorizontals

def get_horizontals_from_file(filename, groupby="average"):
    return jb.load("slides_fromVerticals_{}_{}.joblib".format(groupby, filename)), jb.load("slides_fromHorizontals_{}.joblib".format(filename))

In [41]:
from random import shuffle
from itertools import islice

def get_batches(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [57]:
l = [1,2,3,4,5,6,7]
res = list(get_batches(l, 3))
res

[[1, 2, 3], [4, 5, 6], [7]]

In [15]:
Photo.id_counter = 0

In [89]:
import input_output as io
photos = io.read("c_memorable_moments.txt")
slides_fromVerticals, slides_fromHorizontals = get_horizontals_from_collection(photos, groupby="average")
slides = slides_fromVerticals + slides_fromHorizontals

In [19]:
a = geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200) 

Initial distance: -169.0
-- iteration 0 -- 
-- iteration 10 -- 
-- iteration 20 -- 
-- iteration 30 -- 
-- iteration 40 -- 
-- iteration 50 -- 
-- iteration 60 -- 
-- iteration 70 -- 
-- iteration 80 -- 
-- iteration 90 -- 
-- iteration 100 -- 
-- iteration 110 -- 
-- iteration 120 -- 
-- iteration 130 -- 
-- iteration 140 -- 
-- iteration 150 -- 
-- iteration 160 -- 
-- iteration 170 -- 
-- iteration 180 -- 
-- iteration 190 -- 
Final distance: -229.0


In [24]:
a[0]

id= (969,), tags= {'tgx1', 't7h', 'tz91', 'th02', 'thn', 't471', 'tpw', 'tf22', 't6c', 't651', 't4t1', 't622', 't361', 'tf71'}

In [19]:
sh = SlideShow(a, None)

In [20]:
write_Slideshow_to_file(sh, "output_c.txt")

In [21]:
Photo.id_counter = 0

In [98]:
import input_output as io
photos_b = io.read("b_lovely_landscapes.txt")
slides_fromVerticals_b, slides_fromHorizontals_b = get_horizontals_from_collection(photos_b, groupby="average")
slides_b = slides_fromVerticals_b + slides_fromHorizontals_b

In [99]:
type(slides_b)

list

In [101]:
slides_batches_b = list(get_batches(slides_b, 1000))

In [102]:
len(slides_batches_b[0])

1000

In [103]:
import multiprocessing
pool = multiprocessing.Pool(3)
print(multiprocessing.cpu_count())

def geneticAlgorithmMap (slides):
    return geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200) 
    
results_b = []
b_file = []
for sld in slides_batches_b:
    results_b.append(pool.apply_async(geneticAlgorithmMap, [sld]))
    
for result in results_b:
    b_file += result.get()

4
Initial distance: -7.0
-- iteration 0 -- 
Initial distance: -10.0
-- iteration 0 -- 
Initial distance: -7.0
-- iteration 0 -- 
-- iteration 10 -- 
-- iteration 10 -- 
-- iteration 10 -- 
-- iteration 20 -- 
-- iteration 20 -- 
-- iteration 20 -- 
-- iteration 30 -- 
-- iteration 30 -- 
-- iteration 30 -- 
-- iteration 40 -- 
-- iteration 40 -- 
-- iteration 40 -- 
-- iteration 50 -- 
-- iteration 50 -- 
-- iteration 50 -- 
-- iteration 60 -- 
-- iteration 60 -- 
-- iteration 60 -- 
-- iteration 70 -- 
-- iteration 70 -- 
-- iteration 70 -- 
-- iteration 80 -- 
-- iteration 80 -- 
-- iteration 80 -- 
-- iteration 90 -- 
-- iteration 90 -- 
-- iteration 90 -- 
-- iteration 100 -- 
-- iteration 100 -- 
-- iteration 100 -- 
-- iteration 110 -- 
-- iteration 110 -- 
-- iteration 110 -- 
-- iteration 120 -- 
-- iteration 120 -- 
-- iteration 120 -- 
-- iteration 130 -- 
-- iteration 130 -- 
-- iteration 130 -- 
-- iteration 140 -- 
-- iteration 140 -- 
-- iteration 140 -- 
-- iteration 150

Process ForkPoolWorker-77:
Process ForkPoolWorker-78:
Process ForkPoolWorker-79:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "<ipython-input-92-1358f004451f>

KeyboardInterrupt: 

Traceback (most recent call last):
  File "<ipython-input-5-cf5d024551cc>", line 4, in rankRoutes
    fitnessResults[i] = Fitness(population[i]).routeFitness()
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
KeyboardInterrupt
  File "/Users/ramzimissaoui/anaconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "<ipython-input-92-1358f004451f>", line 6, in geneticAlgorithmMap
    return geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200)
  File "<ipython-input-13-877164477ff5>", line 8, in geneticAlgorithm
    pop = nextGeneration(pop, eliteSize, mutationRate)
  File "<ipython-input-12-8c79f6d090fd>", line 2, in nextGeneration
    popRanked = rankRoutes(currentGen)
  

In [77]:
import concurrent.futures

geneticAlgorithmMap = lambda slides: geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200) 

def g_map(algo, slides):
    return algo(slides)
with concurrent.futures.ProcessPoolExecutor() as executor:
    #read file and get slides
    #photos = io.read("b_lovely_landscapes.txt")
    #slides_fromVerticals, slides_fromHorizontals = get_horizontals_from_collection(photos, groupby="average")
    #slides = slides_fromVerticals + slides_fromHorizontals
    #b_file = []
    #slides_batches = list(get_batches(slides, 1000))
    
    # Process the list of files, but split the work across the process pool to use all CPUs!
    b_file = executor.map(g_map(geneticAlgorithmMap, slides), slides)
        
        
#for slides in slides_batches:
#    b_file+=geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200) 

KeyboardInterrupt: 

In [73]:
list(b_file)

TypeError: <lambda>() missing 1 required positional argument: 'slides'

In [None]:
sh_b = SlideShow(b_file, None)
write_Slideshow_to_file(sh_b, "output_b.txt")

In [None]:
Photo.id_counter = 0

In [16]:
import input_output as io
photos = io.read("d_pet_pictures.txt")
slides_fromVerticals, slides_fromHorizontals = get_horizontals_from_collection(photos, groupby="average")
slides = slides_fromVerticals + slides_fromHorizontals
d_file = geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=50) 

Initial distance: 179931.0
-- iteration 0 -- 


KeyboardInterrupt: 

In [None]:
sh_d = SlideShow(d_file, None)
write_Slideshow_to_file(sh_d, "output_d.txt")

In [None]:
Photo.id_counter = 0

In [None]:
import input_output as io
photos = io.read("e_shiny_selfies.txt")
slides_fromVerticals, slides_fromHorizontals = get_horizontals_from_collection(photos, groupby="average")
slides = slides_fromVerticals + slides_fromHorizontals
e_file = geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200) 
sh_e = SlideShow(e_file, None)
write_Slideshow_to_file(sh_e, "output_e.txt")

In [None]:
Photo.id_counter = 0

In [None]:
import input_output as io
photos = io.read("a_example.txt")
slides_fromVerticals, slides_fromHorizontals = get_horizontals_from_collection(photos, groupby="average")
slides = slides_fromVerticals + slides_fromHorizontals
a_file = geneticAlgorithm(population=slides, popSize=100, eliteSize=20, mutationRate=0.01, generations=200) 
sh_a = SlideShow(a_file, None)
write_Slideshow_to_file(sh_a, "output_a.txt")