In [25]:
import numpy as np
import scipy
import scipy.linalg
from scipy.special import gamma
import tensorflow as tf

import plotly.graph_objs as go
import plotly.express as px
from tqdm import trange
import matplotlib.pyplot as plt
import cma
import time

In [26]:
class CMA_ES:
    def __init__(self, x0, sigma, maxfevals = 10000, popsize = None, weights = None, domain = None):
        N = x0.shape[0]
        self.dimension = N
        self.chiN = N**0.5 * (1 - 1. / (4 * N) + 1. / (21 * N**2))
        self.lam = 4 + int(3 * np.log(N)) if not popsize else popsize
        print(f"Popsize: {self.lam}")
        self.mu = int(self.lam / 2)
        self.shape = tf.cast((self.lam, N), tf.int32)
        
        if weights:
            self.weights = weights
        else:
            self.weights = np.array([np.log(self.lam / 2 + 0.5) - np.log(i + 1) if i < self.mu else 0
                        for i in range(self.lam)])
            self.weights /= np.sum(self.weights)
        if(self.weights.shape == (self.lam,)):
            self.weights = self.weights[:, np.newaxis]
        self.domain = domain
        self.mueff = np.sum(self.weights)**2 / np.sum(self.weights**2)
        
        self.cc = (4 + self.mueff/N) / (N+4 + 2 * self.mueff/N)
        self.cs = (self.mueff + 2) / (N + self.mueff + 5)
        self.c1 = 2 / ((N + 1.3)**2 + self.mueff) 
        # self.cmu = min([1 - self.c1, 2 * (self.mueff - 2 + 1/self.mueff) / ((N + 2)**2 + self.mueff)])
        self.cmu = 2 * (self.mueff - 2 + 1 / self.mueff) / ((N + 2)**2 + 2 * self.mueff / 2)
        # self.damps = 2 * self.mueff/self.lam + 0.3 + self.cs
        self.damps = 1 + 2 * max(0, np.sqrt((self.mueff - 1)/(N + 1)) - 1) + self.cs

        self.xmean = np.array(x0[:])
        self.sigma = sigma
        self.pc = np.zeros(N) 
        self.ps =np.zeros(N) 
        self.lazy_gap_evals = 0.5 * N * self.lam * (self.c1 + self.cmu)**-1 / N**2
        self.maxfevals = maxfevals
        self.C = np.identity(N)
        self.counteval = 0 
        self.fitvals = []   
        self.best = (x0, None)
        self.condition_number = 1
        self.eigen_values = np.ones(N)
        self.eigen_vectors = np.identity(N)
        self.updated_eval = 0
        self.inv_sqrt = np.ones(N)
        self.B = np.eye(self.dimension)
        self.D = np.eye(self.dimension)

    def _update_eigensystem(self, current_eval, lazy_gap_evals):
        if current_eval <= self.updated_eval + lazy_gap_evals:
            return self
        self.eigen_values, self.eigen_vectors = np.linalg.eig(self.C)
        self.inv_sqrt = self.eigen_vectors @ np.diag(self.eigen_values**-0.5) @ self.eigen_vectors.T
        self.condition_number = self.eigen_values.max() / self.eigen_values.min()
         
    def sample(self):
        z = tf.random.normal(self.shape, dtype=tf.float64)
        z = np.array(z)
        y = z @ (self.B @ self.D)
        x = self.xmean + self.sigma * y
        return x
    
    def update(self, x, fitvals):
        """Zaktualizuj wartoĹci uzyskanych parametrĂłw"""
        self.counteval += fitvals.shape[0] 
        #check if x in domain, if its not add 1e8 to fitval of this x
        if self.domain:
            for i, xi in enumerate(x):
                if np.any(xi < self.domain[0]) or np.any(xi > self.domain[1]):
                    fitvals[i] += 1e8

        
        #------------------------------------------------------------------------------------------------------
        idx = np.argsort(fitvals)
        x_sorted = x[idx]
        self.fitvals = fitvals[idx] 
        self.best = (x[0], self.fitvals[0])

        xdiff = x_sorted - self.xmean
        x_mean = np.sum(xdiff * self.weights, axis=0)
        m = self.xmean + x_mean
        #------------------------------------------------------------------------------------------------------
        y_mean = x_mean / self.sigma 
        pc = (1 - self.cc) * self.pc + np.sqrt(self.cc * (2 - self.cc) * self.mueff) * y_mean
        pcmatrix = pc[:, np.newaxis]

        C_m = np.array([e[:, np.newaxis] * e.T for e in (xdiff / self.sigma)])
        y_s = np.sum(C_m * self.weights[:, np.newaxis], axis=0)

        C = (1 - self.c1 - self.cmu) * self.C + self.c1 * pcmatrix * pcmatrix.T + self.cmu * y_s

        C = (C + C.T)/2.0
        
        #--------------------------------------------------------------------------------------------------------

        D_inv = np.diag(np.reciprocal(np.diag(self.D)))
        C_inv_squared = (self.B @ D_inv) @ (self.B.T)
        C_inv_squared_y = np.squeeze(C_inv_squared @ y_mean[:, np.newaxis])  
        ps = (1 - self.cs) * self.ps + np.sqrt(self.cs * (2 - self.cs) * self.mueff) * C_inv_squared_y  

        sigma = self.sigma * np.exp((self.cs / self.damps) * ((np.linalg.norm(ps) / self.chiN) - 1))

        #--------------------------------------------------------------------------------------------------------
        u, B, _ = tf.linalg.svd(C)
        u = np.array(u)
        B = np.array(B)
        diag_D = np.sqrt(u)
        D = np.diag(diag_D)

        #--------------------------------------------------------------------------------------------------------

        self.pc = pc
        self.ps = ps
        self.C = C
        self.sigma = sigma
        self.B = B
        self.D = D
        self.xmean = m
        
    def terminate(self):
        """ZakoĹcz algorytm"""
        if self.counteval <= 0:
            return False
        if self.counteval >= self.maxfevals:
            return True
        if self.condition_number > 1e13:
            return True
        if self.sigma * np.max(self.eigen_values)**0.5 < 1e-13:
            return True
        return False

In [27]:
def optimize(func, x0, sigma, maxfevals = 1000, popsize = None, weights = None, domain = None):
    cma_es = CMA_ES(x0, sigma, maxfevals, popsize, weights, domain)
    res = []
    cntr = 0
    time_now = time.time()
    while not cma_es.terminate():
        cntr+=1
        x = cma_es.sample()
        f_eval = func(x)
        cma_es.update(x, f_eval)
        res.append(cma_es.best)
        if cntr % 100 == 0:
            print(f"Iteration {cntr:5d}: {res[-1][1]}")
    print(f"Time mine: {time.time() - time_now}")
    return res

def optimize_and_plot(f, sigma = 1, d = 10, popsize = None, maxfevals = 1000, domain = None):
    x0 = np.repeat(100.0, d)
    res = optimize(f, x0, sigma, popsize = popsize, maxfevals = maxfevals, domain = domain)
    print(f"Best: {res[-1][0]}, value: {res[-1][1]}")
    y = np.array([nd for st, nd in res])
    fig = px.line(x = np.arange(y.shape[0]) + 1, y = y)
    fig.show()

In [28]:
def rastrigin_function(X):
    return 10.0 * X.shape[1] + np.sum(X**2, axis=1) - 10.0 * np.sum(np.cos(2 * np.pi * X), axis=1)

In [31]:
def run_experiments():
    dim = 10
    sigma = 50
    maxfevals = 10000
    popsize = 50
    num_runs = 5

    custom_results = []
    cma_results = []

    for _ in range(num_runs):
        x0 = np.random.uniform(-5, 5, dim)

        # Run custom CMA-ES
        start_time = time.time()
        res = optimize(rastrigin_function, x0, sigma, maxfevals, popsize)
        custom_results.append((res[-1][1], time.time() - start_time))

        # Run CMA library
        start_time = time.time()
        es = cma.CMAEvolutionStrategy(x0.tolist(), sigma, {"popsize": popsize})
        es.optimize(cma.ff.rastrigin, maxfevals=maxfevals)
        cma_results.append((es.result[1], time.time() - start_time))

    # Print comparison
    print("\nCustom CMA-ES Results:")
    for i, (val, t) in enumerate(custom_results):
        print(f"Run {i + 1}: Best Value = {val:.6f}, Time = {t:.2f}s")

    print("\nCMA Library Results:")
    for i, (val, t) in enumerate(cma_results):
        print(f"Run {i + 1}: Best Value = {val:.6f}, Time = {t:.2f}s")

run_experiments()

Popsize: 50
Iteration   100: 7.425286342921524
Iteration   200: 1.989918114244361
Time mine: 0.08324670791625977
(25_w,50)-aCMA-ES (mu_w=14.0,w_1=14%) in dimension 10 (seed=112957, Thu Dec 12 17:27:49 2024)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     50 7.247040504175677e+03 1.0e+00 4.74e+01  4e+01  5e+01 0:00.0
    2    100 7.566809034061596e+03 1.2e+00 4.21e+01  4e+01  4e+01 0:00.0
    3    150 3.583420463134493e+03 1.3e+00 3.78e+01  3e+01  4e+01 0:00.0
  100   5000 7.128818349178403e+00 3.0e+00 4.74e-01  3e-02  6e-02 0:00.1



ignoring unkown argument {'maxfevals': 10000} in OOOptimizer.optimize



  200  10000 5.969749304745790e+00 1.5e+00 7.44e-05  8e-08  9e-08 0:00.2
  214  10700 5.969749304740773e+00 1.5e+00 1.84e-05  1e-08  1e-08 0:00.2
Popsize: 50
Iteration   100: 4.660940814099348
Iteration   200: 3.9798362283739266
Time mine: 0.0567171573638916
(25_w,50)-aCMA-ES (mu_w=14.0,w_1=14%) in dimension 10 (seed=180476, Thu Dec 12 17:27:49 2024)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     50 8.503204951097512e+03 1.0e+00 4.66e+01  4e+01  5e+01 0:00.0
    2    100 6.928111653324453e+03 1.2e+00 4.19e+01  4e+01  4e+01 0:00.0
    3    150 5.923619930320034e+03 1.2e+00 3.73e+01  3e+01  3e+01 0:00.0
  100   5000 9.744339899108496e+00 3.1e+00 7.14e-01  5e-02  1e-01 0:00.1
  200  10000 4.974795285474798e+00 1.6e+00 9.23e-05  1e-07  1e-07 0:00.2
  215  10750 4.974795285466897e+00 1.4e+00 3.61e-05  2e-08  3e-08 0:00.2
Popsize: 50
Iteration   100: 24.91994153704681
Iteration   200: 3.9798362288733813
Time mine: 0.05524611473083496
(25_w,50)-aCMA-ES (mu_w

In [29]:
time_start = time.time()
dim = 15
sigma = 50
x, es = cma.fmin2(cma.ff.rastrigin, dim * [1], sigma)
es = cma.CMAEvolutionStrategy(dim * [1], 1).optimize(cma.ff.rastrigin)
x, es.result[0]  # best evaluated solution
# es.result[5]  # mean solution, presumably better with noise
time_end = time.time()
print(f"Time authors: {time_end - time_start}")

optimize_and_plot(rastrigin_function, d = 15, maxfevals = 150000, sigma=50, popsize=100)




(6_w,12)-aCMA-ES (mu_w=3.7,w_1=40%) in dimension 15 (seed=158701, Thu Dec 12 17:23:58 2024)
Iterat #Fevals   function value  axis ratio  sigma  min&max std  t[m:s]
    1     12 1.942507412378403e+04 1.0e+00 4.64e+01  4e+01  5e+01 0:00.0
    2     24 1.813997959052828e+04 1.1e+00 4.12e+01  4e+01  4e+01 0:00.0
    3     36 2.009865487710502e+04 1.1e+00 3.98e+01  4e+01  4e+01 0:00.0
  100   1200 1.730605029558492e+02 2.5e+00 1.11e+00  7e-01  1e+00 0:00.1
  200   2400 1.379600743962461e+02 3.3e+00 7.68e-01  4e-01  7e-01 0:00.1
  300   3600 9.610930162680543e+01 6.0e+00 7.19e-01  3e-01  7e-01 0:00.2
  400   4800 2.689472956424385e+01 2.8e+00 8.70e-03  3e-03  4e-03 0:00.2
  500   6000 2.686385503506762e+01 2.3e+00 5.20e-05  1e-05  1e-05 0:00.2
  600   7200 2.686385419352987e+01 2.0e+00 1.91e-07  3e-08  3e-08 0:00.3
  634   7608 2.686385419352814e+01 1.8e+00 2.78e-08  3e-09  4e-09 0:00.3
termination on tolfun=1e-11 (Thu Dec 12 17:23:59 2024)
final/bestever f-value = 2.686385e+01 2.686385e+01 