In [None]:
import numpy as np
import matplotlib.pyplot as plt
import logging

import dataset
import dataset_misc1d
import dataset_misc2d
from backprop import backprop
from backprop import gp, srgp, cmgp, crossover, selector, library, project, diversify
from gp import evaluator as gp_evaluator
from symbols import syntax_tree

SAMPLE_SIZE = 200
NOISE = 0.0

POPSIZE = 1000
MAX_STREE_DEPTH = 5
GENERATIONS = 20
GROUP_SIZE = 5  # tournament selector.
MUTATION_RATE = 0.15
ELITISM = 1
BACKPROP_INTV = -1

NBESTS = 4

#logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

In [None]:
S = dataset_misc1d.MagmanDatasetScaled()
#S = dataset_misc2d.Resistance2()
#S = dataset_misc1d.ABSDataset()
S.sample(size=SAMPLE_SIZE, noise=NOISE, mesh=False)
#S.load('../data/magman.csv')
S.split(train_size=0.7)
S.get_plotter().plot(width=8, height=6, plot_knowldege=False)

S_train = dataset.NumpyDataset(S)
S_test  = dataset.NumpyDataset(S, test=True)

In [None]:
#print(S_train.y.size)
#project.project(S_train, S.knowledge)
#project.project_semantic(S_train.y, S_train, S.knowledge)
#S_train.get_plotter().plot(width=8, height=6)

In [None]:
syntax_tree.SyntaxTreeInfo.set_problem(S_train)

lib = library.Library(2000, 3, S_train)
#lib.find_best_similarity()

trunks = None #gp.generate_trunks(3, S.nvars, S.knowledge)

solutionCreator = gp.RandomSolutionCreator(nvars=S.nvars, trunks=trunks)

multiMutator = gp.MultiMutator(
      gp.SubtreeReplacerMutator(MAX_STREE_DEPTH, solutionCreator),
      gp.FunctionSymbolMutator(),
      gp.NumericParameterMutator(all=True),
      #gp.NumericParameterMutator(all=False)
      )

#diversifier = None #gp.SemanticCrowdingDiversifier(S_train)
diversifier = diversify.SymbolicDiversifier(S_train, lib)

#evaluator = gp.R2Evaluator(S_train)
#evaluator = gp.FUEvaluator(S_train, S.knowledge)
#evaluator = gp.NumericalFUEvaluator(S_train, S.knowledge)
evaluator = gp_evaluator.FastFUEvaluator(S_train, S.knowledge)

selector = gp.TournamentSelector(GROUP_SIZE)

#crossover2 = gp.SubTreeCrossover(MAX_STREE_DEPTH)
#crossover2 = crossover.ApproxGeometricCrossover(lib, MAX_STREE_DEPTH, diversifier)
#crossover2 = crossover.CrossNPushCrossover(lib, MAX_STREE_DEPTH)
crossover2 = crossover.ConstrainedCrossNPushCrossover(lib, MAX_STREE_DEPTH, evaluator.know_evaluator)

projector = project.Projector(lib, S.knowledge)

symb_regressor = \
         gp.GP(POPSIZE, GENERATIONS, MAX_STREE_DEPTH, S_train, S_test,
               creator=solutionCreator,
               evaluator=evaluator,
               selector=selector,
               crossover=crossover2,
               mutator=multiMutator,
               mutrate=MUTATION_RATE,
               diversifier=diversifier,
               projector=projector,
               elitism=ELITISM,
               backprop_intv=BACKPROP_INTV,
               knowledge=S.knowledge,
               trunks=trunks,
               nbests=NBESTS)

with np.errstate(all='ignore'):
      import profiling
      bests, eval_map = symb_regressor.evolve()
      profiling.print_stats()

best_stree = bests[0]
best_eval = eval_map[id(best_stree)]

print("--- Best syntax tree ---")
print(best_stree)
print(best_eval)

In [None]:
import sympy
sympy.init_printing()
sympy.simplify(sympy.factor(best_stree.to_sympy()))

In [None]:
print(best_stree.simplify())
print(best_stree.get_max_depth())
print(best_stree.get_nnodes())
#from backprop import lpbackprop
#sat, stree_cost = lpbackprop.lpbackprop(S.knowledge, best_stree, None)
#print(sat)

K_derivs = S.knowledge.get_derivs()
stree_derivs = backprop.SyntaxTree.diff_all(best_stree, K_derivs, include_zeroth=True)
print(sympy.simplify(sympy.factor(stree_derivs[(0,0)].to_sympy())))

print(gp.NumericalFUEvaluator(S_train, S.knowledge).evaluate(best_stree, eval_deriv=True))

In [None]:
S.get_plotter().plot(width=8, height=6, plot_knowldege=False, model=best_stree, zoomout=1)
"""S.get_plotter().plot(width=8, height=6, plot_knowldege=False, model=best_stree.diff(0).simplify(), zoomout=3)
S.knowledge.synthesize(best_stree, S_train.X).get_plotter().plot(width=8, height=6)

K_evaluator = gp.FUEvaluator(S_train, S.knowledge)
print(K_evaluator.evaluate(best_stree))

print(best_stree.diff(0))
print(best_stree.diff(0).simplify())"""

In [None]:
for quality, qseries in symb_regressor.stats.qualities.items():
    plt.plot(qseries, label=quality)
    print(quality, qseries)
plt.legend()
plt.ylim((-0.01, 1.01))
plt.xlabel('Generation')
plt.ylabel('R2')
plt.title('Qualities')
plt.show()

In [None]:
for quality, qseries in symb_regressor.stats.fea_ratio.items():
    plt.plot(qseries, label=quality)

plt.legend()
plt.ylim((-0.01, 1.01))
plt.xlabel('Generation')
plt.ylabel('Ratio')
plt.title('Feasibility')
plt.show()

In [None]:
import pandas as pd

pd_data = []
front_tracker = symb_regressor.fea_front_tracker.front_tracker_a

for frontidx in range(front_tracker.nfronts):

    front = front_tracker.get_front(frontidx)
    symbset, symbdist = front_tracker.compute_symbdist(frontidx)
    crowdist = front_tracker.compute_crowdist(frontidx)
    
    for idx, (stree, data, length) in enumerate(front):
        pd_data.append(
            [idx, stree, frontidx, data/length, data, length, symbset[id(stree)], symbdist[id(stree)], crowdist[id(stree)]]
        )

        

front_tracker.plot(0)
pd.DataFrame(pd_data, columns=['Score', 'Model', 'Front', 'fpn', 'R2', 'Length', 'Symbset', 'Symbdist', 'Crowdist']).head(n=60)
#print(front_tracker.symbfreq.freq)
#print(symb_regressor.fea_front_tracker.front_tracker_b.symbfreq.freq)
