# Test Distances

This notebook tests and gathers statistics on our distance functions.


In [44]:
import math
import random
import numpy as np

from pto import run, rnd

# import problem
from pto.problems import onemax, sphere, symbolic_regression

In [65]:
def hamming_distance(x, y):
    return sum(xi != yi for xi, yi in zip(x, y))
def euclidean_distance(x, y):
    assert type(x[0]) == float
    x = np.array(x)
    y = np.array(y)
    return np.linalg.norm(x - y)
def tree_edit_distance(x, y):
    #print(x)
    #print(y)
    return 1

In [66]:
def make_op(problem, gen_args, fit_args):
    # This returns an instance of the class 'Op' configured with the parameter passed to 'run'.
    # New users: don't try to understand this or do anything like this.
    class oper:
        def __init__(self, op, **_):
            self.op = op
        def __call__(self):
            return self.op
 
    op = run(problem.generator, problem.fitness, Solver=oper, 
             gen_args=gen_args, fit_args=fit_args, better=problem.better)
    return op

In [78]:
def report(problem, gen_args, fit_args, phenotype_distance=None):
    # print(problem.__name__)
    op = make_op(problem, gen_args, fit_args)
    n_iters = 100

    return np.array([do_distances(op, phenotype_distance)
            for i in range(n_iters)])

def do_distances(op, phenotype_distance=None):

    # parents
    x = op.create_ind()
    y = op.create_ind()

    # child
    w = op.crossover_ind(x, y)

    # mutate a parent
    z = op.mutate_position_wise_ind(x)

    # we are interested in:
    # d(x, y): baseline distance between random pair
    # d(x, w) + d(y, w): crossover offspring distance should be <= d(x, y)?
    # d(x, z): mutation offspring distance should be "small"

    # phenotype distance also of interest, perhaps

    # worry about repair. the operators are geometric pre-repair

    def d(x, y): return op.distance_ind(x, y)
    def dp(x, y): return phenotype_distance(x.pheno, y.pheno) if phenotype_distance else 0

    return [
        d(x, y),
        d(x, w),
        d(y, w),
        d(x, z),
        dp(x, y),
        dp(x, w),
        dp(y, w),
        dp(x, z)
    ]

In [79]:
problems = (onemax, sphere, symbolic_regression)
distances = (hamming_distance, euclidean_distance, tree_edit_distance)
gen_argss = (onemax.size,), (sphere.size,), (symbolic_regression.func_set, symbolic_regression.term_set)
fit_argss = (None, None, (symbolic_regression.X_train, symbolic_regression.y_train))
for p, gen_args, fit_args, d in zip(problems, gen_argss, fit_argss, distances):
    results = report(p, gen_args, fit_args, d)

In [80]:
results

array([[3.23854946e+01, 3.21445446e+01, 2.40949975e-01, 3.15407287e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [6.77781780e+00, 1.80585701e-01, 6.59723210e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [1.07873438e+00, 1.03240097e+00, 4.63334066e-02, 0.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [2.87690649e+01, 2.73940650e+01, 1.37499990e+00, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [4.46503815e+00, 4.41755508e+00, 4.74830779e-02, 0.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [4.58183795e+00, 3.79837273e-01, 4.20200068e+00, 0.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [4.99495477e-02, 1.70998610e-02, 3.28496868e-02, 1.00000000e+00,
        1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.