In [1]:
import sys
sys.path.append('../src/')

In [2]:
import pandas as pd
import utils

# Load opfunu functions

In [3]:
from opfunu.dimension_based import benchmark1d, benchmark2d, benchmark3d, benchmarknd
from opfunu.type_based import multi_modal, uni_modal
import inspect

In [4]:
classes = [
    benchmark1d, benchmark2d, benchmark3d, benchmarknd, # total methods = 62
    # multi_modal, uni_modal, # total methods = 47
]

In [5]:
def cls2name(cls):
    module = cls.Functions.__module__
    name = module.split('.')[-1]
    return name

def cls2methods(cls):
    return list(dict(inspect.getmembers(cls.Functions, predicate=inspect.isfunction)).keys())

def build_cls_df(classes):
    rows = []
    for cls in classes:
        name = cls2name(cls)
        methods = cls2methods(cls)
        for method_name in methods:
            clean_name = method_name.replace('_', ' ').strip()
            clean_name = ' '.join([ cn.capitalize() for cn in clean_name.split(' ') ])
            rows.append(dict(
                #cls=name,
                name=clean_name,
                #method=method_name,
                call=f'{name}.Functions().{method_name}',
            ))
    df = pd.DataFrame(rows)
    df = df.sort_values('name')
    df = df.reset_index(drop=True)
    return df

In [6]:
df = build_cls_df(classes)
df

Unnamed: 0,name,call
0,Ackley,benchmarknd.Functions()._ackley__
1,Ackley N2,benchmark2d.Functions()._ackley_n2__
2,Ackley N3,benchmark2d.Functions()._ackley_n3__
3,Ackley N4,benchmarknd.Functions()._ackley_n4__
4,Adjiman,benchmark2d.Functions()._adjiman__
...,...,...
58,Xin She Yang,benchmarknd.Functions()._xin_she_yang__
59,Xin She Yang N2,benchmarknd.Functions()._xin_she_yang_n2__
60,Xin She Yang N3,benchmarknd.Functions()._xin_she_yang_n3__
61,Xin She Yang N4,benchmarknd.Functions()._xin_she_yang_n4__


# Load Scraped Data

In [7]:
from collectors import sfu, infinity77, benchmarkfcns
import json

_s_ = ''' sfu '''
# sfu.download_homepage()
# sfu.download_functions()
sfu.crawl_functions()

_i_ = ''' infinity77 '''
# infinity77.download_homepage()
# infinity77.download_pages()
infinity77.crawl_pages()

_b_ = ''' benchmarkfcns '''
benchmarkfcns.crawl_markdown()

In [8]:
idf = infinity77.load_df()
sdf = sfu.load_df()
bdf = benchmarkfcns.load_df()

# Map opfunu benchmarks to sources

In [9]:
sources = {
    _s_: sdf,
    _i_: idf,
    _b_: bdf,
}
df = build_cls_df(classes)


adict = {}
for source_id, source_df in sources.items():
    # create mapping df
    source_map, *_ = utils.diff_map(df.name, source_df.name)
    source_map_df = pd.DataFrame(source_map.items(), columns=['name', source_id])
    # merge
    df = pd.merge(left=df, right=source_map_df, on='name', how='outer')

df

Unnamed: 0,name,call,sfu,infinity77,benchmarkfcns
0,Ackley,benchmarknd.Functions()._ackley__,Ackley,Ackley,Ackley
1,Ackley N2,benchmark2d.Functions()._ackley_n2__,,,Ackley N. 2
2,Ackley N3,benchmark2d.Functions()._ackley_n3__,,,Ackley N. 3
3,Ackley N4,benchmarknd.Functions()._ackley_n4__,,,Ackley N. 4
4,Adjiman,benchmark2d.Functions()._adjiman__,,Adjiman,Adjiman
...,...,...,...,...,...
58,Xin She Yang,benchmarknd.Functions()._xin_she_yang__,,Xin-She Yang 1,Xin-She Yang
59,Xin She Yang N2,benchmarknd.Functions()._xin_she_yang_n2__,,Xin-She Yang 2,Xin-She Yang N. 2
60,Xin She Yang N3,benchmarknd.Functions()._xin_she_yang_n3__,,Xin-She Yang 3,Xin-She Yang N. 3
61,Xin She Yang N4,benchmarknd.Functions()._xin_she_yang_n4__,,Xin-She Yang 4,Xin-She Yang N. 4


# Verify Mapping

In [11]:
# df.to_csv('df.csv', index=False)

In [None]:
# open csv file and manually remove names from sources that are wrong

In [12]:
df = pd.read_csv('df.csv')

# select which sources are superior

In [13]:
# MAYBE TODO
# rows = []
# for source_id, source_df in sources.items():
#     print(source_id)
#     row = {'source': source_id}
#     for column in source_df.columns:
#         row[column] = True
#     rows.append(row)
# pd.DataFrame(rows)

# build db

In [14]:
from collections import defaultdict

In [15]:
source_ids = list(sources.keys())

In [16]:
def crossref(source_id, name):
    source_df = sources[source_id]
    row = source_df[source_df['name'] == name].iloc[0]
    return row.to_dict()

In [None]:
def frow(row):
    # collect each field-value(s) pairs for each source mentioned
    
    # first check whether this
    field2value = defaultdict(list)
    for source_id in source_ids:
        if not pd.isna(row[source_id]):
            # then this source exists for this source
            # cross reference its value to get dict of values
            adict = crossref(source_id, row[source_id])
            # print(adict)
            for field, value in adict.items():
                field2value[field].append(value)
    # add method
    field2value['method'] = row['call']
    
    field2value = sort_dict(field2value)
    return field2value

In [266]:
def sort_dict(adict):
    return dict(sorted(adict.items(), key=lambda kv: kv[0]))

def set_and_get(f2v, f, v=None):
    f2v.setdefault(f, v)
    return f2v[f]

def latex_treatment(f2v, f):
    v = set_and_get(f2v, f)
    if isinstance(v, list):
        v = [ wrapr(iv).replace('\n', '\\n') for iv in v ]
        if len(v) == 1:
            f2v[f] = v[0]
        else:
            f2v[f] = v

def wrap(s, r=''):
    if isinstance(s, list):
        return [ wrap(si) for si in s ]
    elif isinstance(s, str):
        return f'{r}"{s}"'

def wrapr(s):
    return wrap(s, r='r')



In [None]:
KNOWN_TAGS = [
    ('separable', 'non-separable'),
    ('continuous', 'discontinuous'),
    ('differentiable', 'non-differentiable'),
    ('multimodal', 'unimodal'),
    ('convex', 'non-convex'),
    ('scalable', 'non-scalable'),
    ('random', 'non-random'),
    ('parametric', 'non-parametric'),
]
def tags2dict(*tags):
    tags = list(tags)
    adict = {}
    for i, (pos, neg) in enumerate(KNOWN_TAGS):
        if not tags:
            for pos, _ in KNOWN_TAGS[i:]:
                adict[pos] = False
            return adict
        elif pos in tags:
            adict[pos] = True
            tags.remove(pos)
        elif neg in tags:
            adict[pos] = False
            tags.remove(neg)
        else:
            adict[pos] = False
    if tags:
        dimension, tags = dimensional(*tags)
        if dimension:
            adict['dimension'] = dimension
    if tags:
        raise ValueError(f'Unrecognized tags: {tags}')
    return adict

def dimensional(*tags):
    for i, tag in enumerate(tags):
        if tag.endswith('-dimensional'):
            return tag.removesuffix('-dimensional'), tags[:i] + tags[i+1:]
    return None, tags 


def build_tags(tags):
    # TODO
    print(tags)

In [278]:

def clean_field2value(f2v):
    ''' assert that dict contains a value for each field '''
    # dimensions
    v = f2v.get('dimensions')
    if v is None:
        f2v['dimensions'] = wrap('TODO')
    else:
        v = v[0]
        if v.isdigit():
            f2v['dimensions'] = v
        else:
            f2v['dimensions'] = wrap(v)

    # domain
    v = set_and_get(f2v, 'domain')
    if v:
        if len(set(v)) == 1:
            f2v['domain'] = v[0]
    
    # domain_latex
    latex_treatment(f2v, 'domain_latex')

    # latex
    latex_treatment(f2v, 'latex')
    
    # links
    links = f2v['link']
    del f2v['link']
    f2v['links'] = tuple(wrap(links))
    
    # method
    f2v['method']
    
    # minima
    v = set_and_get(f2v, 'minima')
    if v is not None:
        if len(v) == 1:
            v = v[0]
            v = eval(v)
            v = [ [ vii.strip() for vii in vi.split('=') ] for vi in v ]
            a = []
            for vk, vv in v:
                try:
                    eval(vv)
                except SyntaxError as se:
                    vv = wrapr(vv)
                a.append(f'{wrap(vk)}={vv}')
            v = 'dict(' + ', '.join(a) + ')'
        f2v['minima'] = v
    
    # minima_latex
    latex_treatment(f2v, 'minima_latex')
    
    # name
    names = list(sorted(set(f2v['name'])))
    if len(names) == 1:
        f2v['name'] = wrap(names[0])
    else:
        f2v['name'] = wrap(names)
    
    # references
    pass
    
    # sort before adding tags
    tags = f2v.get('tags')
    if tags is not None:
        del f2v['tags']
    # TODO: if there is a tag r`\d-dimensional', them add that to the dimensions
    f2v = sort_dict(f2v)
    
    # tags
    if tags is None:
        f2v['tags'] = "TODO"
    else:
        v = tags
        try:
            v = eval(v[0])
        except Exception:
            f2v['tags'] = "TODO"
        else:
            v = build_tags(v)
            
            #v = tuple(wrap(v))
    
    
    
    return f2v

In [279]:
s = 'data = [\n'
t = '    '
for index, row in dff.iterrows():
    s += f"{t}# {row['name']}\n{t}dict(\n"
    field2value = frow(row)
    field2value = clean_field2value(field2value)
    for field, values in field2value.items():
        s += f"{t*2}{field}="
        # values
        if isinstance(values, list):
            s += f'\n{t*3}'
            s += f',\n{t*3}'.join(values)
        elif isinstance(values, tuple):
            s += f'[\n{t*3}'
            s += f',\n{t*3}'.join(values)
            s += f',\n{t*2}]'
        elif values is None:
            s += 'None'
        else:
            s += values
            
            #value = values
            # if isinstance(value, list)
        s += f',\n'
    s += f'{t}),\n'
s += ']'
# print(s)

['n-dimensional', 'continuous', 'multimodal', 'non-convex', 'differentiable']
['2-dimensional', 'non-separable', 'unimodal', 'convex', 'differentiable']
['2-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['n-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['2-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['n-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['n-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['2-dimensional', 'non-separable', 'multimodal', 'non-convex', 'non-differentiable']
['2-dimensional', 'continuous', 'multimodal', 'non-convex']
['2-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['2-dimensional', 'continuous', 'convex', 'unimodal']
['2-dimensional', 'non-separable', 'multimodal', 'non-convex', 'differentiable']
['2-dimensional', 'continuous', 'convex', 'differentiable', 'non-separable', 'uni

In [269]:
with open('db.py', 'w') as f:
    f.write(s)