ENH: cycles replaced by numpy vector operations. #725

quant12345 · 2023-09-13T11:05:44Z

I asked a question about the size of arrays here.

So no one answered. After thinking a little more, I came to the conclusion that arrays are two-dimensional. Since there are no vector operations in Python lists and in a row code:
(ind.fitness.values[i] - opt_ind[i])**2
are just numbers.

Therefore, I make a pull request my changes 'convergence' functions.

Cycles replaced by numpy vector operations.
On average, the 'convergence' function speeds up by 20-30 times. In tests, I created separate functions 'convergence_old' and 'convergence_new'. The time spent on calculating each function and its value are printed. In the loop, the parameter mu=i is passed to the 'main' function. I removed part of the code because it takes a long time to wait for the whole calculation, and I just need the 'pop' pseudo array.

tests

import array
import random
import json

import numpy
import datetime
from math import sqrt

from deap import base
from deap import benchmarks
from deap import creator
from deap import tools


def convergence_old(first_front, optimal_front):
    """Given a Pareto front `first_front` and the optimal Pareto front,
    this function returns a metric of convergence
    of the front as explained in the original NSGA-II article by K. Deb.
    The smaller the value is, the closer the front is to the optimal one.
    """

    distances = []

    for ind in first_front:
        distances.append(float("inf"))
        for opt_ind in optimal_front:
            dist = 0.
            for i in range(len(opt_ind)):
                dist += (ind.fitness.values[i] - opt_ind[i])**2
            if dist < distances[-1]:
                distances[-1] = dist
        distances[-1] = sqrt(distances[-1])

    return sum(distances) / len(distances)


def convergence_new(first_front, optimal_front):
    """Given a Pareto front `first_front` and the optimal Pareto front,
    this function returns a metric of convergence
    of the front as explained in the original NSGA-II article by K. Deb.
    The smaller the value is, the closer the front is to the optimal one.
    """

    #Extract fitness.values to the list and convert to numpy
    first_front_ = numpy.array([i.fitness.values for i in first_front])
    optimal_front_ = numpy.array(optimal_front)

    #The difference first_front - optimal_front and raised to a power
    diff_pow = (first_front_[:, numpy.newaxis] - optimal_front_) ** 2

    sum_dp = numpy.sum(diff_pow, axis=2)#axis sums

    #square root of minimums sum_dp
    min_sq = numpy.sqrt(numpy.min(sum_dp, axis=1))


    return numpy.sum(min_sq) / len(first_front_)


creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
creator.create("Individual", array.array, typecode='d', fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# Problem definition
# Functions zdt1, zdt2, zdt3, zdt6 have bounds [0, 1]
BOUND_LOW, BOUND_UP = 0.0, 1.0

# Functions zdt4 has bounds x1 = [0, 1], xn = [-5, 5], with n = 2, ..., 10
# BOUND_LOW, BOUND_UP = [0.0] + [-5.0]*9, [1.0] + [5.0]*9

# Functions zdt1, zdt2, zdt3 have 30 dimensions, zdt4 and zdt6 have 10
NDIM = 30

def uniform(low, up, size=None):
    try:
        return [random.uniform(a, b) for a, b in zip(low, up)]
    except TypeError:
        return [random.uniform(a, b) for a, b in zip([low] * size, [up] * size)]

toolbox.register("attr_float", uniform, BOUND_LOW, BOUND_UP, NDIM)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", benchmarks.zdt1)
toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0)
toolbox.register("mutate", tools.mutPolynomialBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0, indpb=1.0/NDIM)
toolbox.register("select", tools.selNSGA2)

def main(seed=None, mu = 100):
    random.seed(seed)

    NGEN = 250
    MU = mu
    CXPB = 0.9

    stats = tools.Statistics(lambda ind: ind.fitness.values)
    # stats.register("avg", numpy.mean, axis=0)
    # stats.register("std", numpy.std, axis=0)
    stats.register("min", numpy.min, axis=0)
    stats.register("max", numpy.max, axis=0)

    logbook = tools.Logbook()
    logbook.header = "gen", "evals", "std", "min", "avg", "max"

    pop = toolbox.population(n=MU)

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    # This is just to assign the crowding distance to the individuals
    # no actual selection is done
    pop = toolbox.select(pop, len(pop))

    return pop



with open("zdt1_front.json") as optimal_front_data:
    optimal_front = json.load(optimal_front_data)
# Use 500 of the 1000 points in the json file
optimal_front = sorted(optimal_front[i] for i in range(0, len(optimal_front), 2))

arr = [100, 300, 500, 1000, 5000, 10000]

for i in arr:
    pop = main(mu=i)
    pop.sort(key=lambda x: x.fitness.values)

    print('MU', i)

    now = datetime.datetime.now()
    print("Convergence old: ", convergence_old(pop, optimal_front))
    time_old = datetime.datetime.now() - now

    now = datetime.datetime.now()
    print("Convergence new: ", convergence_new(pop, optimal_front))
    time_new = datetime.datetime.now() - now

    print('time_old', time_old)
    print('time_new', time_new)

    print('attitude time_old/time_new', time_old / time_new)

Keep in mind that numpy calculations a little differently,
so there are differences after the decimal point after 14 sign.

On average, the 'convergence' function speeds up by 20-30 times.

Cycles replaced by numpy vector operations.

1922f24

On average, the 'convergence' function speeds up by 20-30 times.

quant12345 changed the title ~~Cycles replaced by numpy vector operations.~~ ENH: Cycles replaced by numpy vector operations. Dec 1, 2023

quant12345 changed the title ~~ENH: Cycles replaced by numpy vector operations.~~ ENH: cycles replaced by numpy vector operations. Dec 1, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ENH: cycles replaced by numpy vector operations. #725

ENH: cycles replaced by numpy vector operations. #725

quant12345 commented Sep 13, 2023 •

edited

Loading

ENH: cycles replaced by numpy vector operations. #725

Are you sure you want to change the base?

ENH: cycles replaced by numpy vector operations. #725

Conversation

quant12345 commented Sep 13, 2023 • edited Loading

quant12345 commented Sep 13, 2023 •

edited

Loading