Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: cycles replaced by numpy vector operations. #725

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

quant12345
Copy link

@quant12345 quant12345 commented Sep 13, 2023

I asked a question about the size of arrays here.

So no one answered. After thinking a little more, I came to the conclusion that arrays are two-dimensional. Since there are no vector operations in Python lists and in a row code:
(ind.fitness.values[i] - opt_ind[i])**2
are just numbers.

Therefore, I make a pull request my changes 'convergence' functions.

Cycles replaced by numpy vector operations.
On average, the 'convergence' function speeds up by 20-30 times. In tests, I created separate functions 'convergence_old' and 'convergence_new'. The time spent on calculating each function and its value are printed. In the loop, the parameter mu=i is passed to the 'main' function. I removed part of the code because it takes a long time to wait for the whole calculation, and I just need the 'pop' pseudo array.

tests
import array
import random
import json

import numpy
import datetime
from math import sqrt

from deap import base
from deap import benchmarks
from deap import creator
from deap import tools


def convergence_old(first_front, optimal_front):
    """Given a Pareto front `first_front` and the optimal Pareto front,
    this function returns a metric of convergence
    of the front as explained in the original NSGA-II article by K. Deb.
    The smaller the value is, the closer the front is to the optimal one.
    """

    distances = []

    for ind in first_front:
        distances.append(float("inf"))
        for opt_ind in optimal_front:
            dist = 0.
            for i in range(len(opt_ind)):
                dist += (ind.fitness.values[i] - opt_ind[i])**2
            if dist < distances[-1]:
                distances[-1] = dist
        distances[-1] = sqrt(distances[-1])

    return sum(distances) / len(distances)


def convergence_new(first_front, optimal_front):
    """Given a Pareto front `first_front` and the optimal Pareto front,
    this function returns a metric of convergence
    of the front as explained in the original NSGA-II article by K. Deb.
    The smaller the value is, the closer the front is to the optimal one.
    """

    #Extract fitness.values to the list and convert to numpy
    first_front_ = numpy.array([i.fitness.values for i in first_front])
    optimal_front_ = numpy.array(optimal_front)

    #The difference first_front - optimal_front and raised to a power
    diff_pow = (first_front_[:, numpy.newaxis] - optimal_front_) ** 2

    sum_dp = numpy.sum(diff_pow, axis=2)#axis sums

    #square root of minimums sum_dp
    min_sq = numpy.sqrt(numpy.min(sum_dp, axis=1))


    return numpy.sum(min_sq) / len(first_front_)


creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
creator.create("Individual", array.array, typecode='d', fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# Problem definition
# Functions zdt1, zdt2, zdt3, zdt6 have bounds [0, 1]
BOUND_LOW, BOUND_UP = 0.0, 1.0

# Functions zdt4 has bounds x1 = [0, 1], xn = [-5, 5], with n = 2, ..., 10
# BOUND_LOW, BOUND_UP = [0.0] + [-5.0]*9, [1.0] + [5.0]*9

# Functions zdt1, zdt2, zdt3 have 30 dimensions, zdt4 and zdt6 have 10
NDIM = 30

def uniform(low, up, size=None):
    try:
        return [random.uniform(a, b) for a, b in zip(low, up)]
    except TypeError:
        return [random.uniform(a, b) for a, b in zip([low] * size, [up] * size)]

toolbox.register("attr_float", uniform, BOUND_LOW, BOUND_UP, NDIM)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", benchmarks.zdt1)
toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0)
toolbox.register("mutate", tools.mutPolynomialBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0, indpb=1.0/NDIM)
toolbox.register("select", tools.selNSGA2)

def main(seed=None, mu = 100):
    random.seed(seed)

    NGEN = 250
    MU = mu
    CXPB = 0.9

    stats = tools.Statistics(lambda ind: ind.fitness.values)
    # stats.register("avg", numpy.mean, axis=0)
    # stats.register("std", numpy.std, axis=0)
    stats.register("min", numpy.min, axis=0)
    stats.register("max", numpy.max, axis=0)

    logbook = tools.Logbook()
    logbook.header = "gen", "evals", "std", "min", "avg", "max"

    pop = toolbox.population(n=MU)

    # Evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in pop if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    # This is just to assign the crowding distance to the individuals
    # no actual selection is done
    pop = toolbox.select(pop, len(pop))

    return pop



with open("zdt1_front.json") as optimal_front_data:
    optimal_front = json.load(optimal_front_data)
# Use 500 of the 1000 points in the json file
optimal_front = sorted(optimal_front[i] for i in range(0, len(optimal_front), 2))

arr = [100, 300, 500, 1000, 5000, 10000]

for i in arr:
    pop = main(mu=i)
    pop.sort(key=lambda x: x.fitness.values)

    print('MU', i)

    now = datetime.datetime.now()
    print("Convergence old: ", convergence_old(pop, optimal_front))
    time_old = datetime.datetime.now() - now

    now = datetime.datetime.now()
    print("Convergence new: ", convergence_new(pop, optimal_front))
    time_new = datetime.datetime.now() - now

    print('time_old', time_old)
    print('time_new', time_new)

    print('attitude time_old/time_new', time_old / time_new)

Keep in mind that numpy calculations a little differently,
so there are differences after the decimal point after 14 sign.

On average, the 'convergence' function speeds up by 20-30 times.
@quant12345 quant12345 changed the title Cycles replaced by numpy vector operations. ENH: Cycles replaced by numpy vector operations. Dec 1, 2023
@quant12345 quant12345 changed the title ENH: Cycles replaced by numpy vector operations. ENH: cycles replaced by numpy vector operations. Dec 1, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

1 participant