# Frequency vs complexity

Initial setup:

In [1]:
import utils
import altair as alt
import pickle
import pandas as pd

def phenosToDataFrame(phenoslist):
    data = {}
    for dataset, phenos in phenoslist.items():
        for pheno in phenos:
            if pheno['count'] >= 1:
                r = utils.simplifyHexRule(pheno['rule'])
                data[r] = dict(pheno)
                del data[r]['genotypes']
                data[r]['dataset'] = dataset
                data[r]['nCubeTypes'] = utils.countCubeTypes(r)
                data[r]['url'] = 'https://akodiat.github.io/polycubes?rule={}'.format(r)
    return pd.DataFrame(data=data).transpose()

Load data and plot

In [2]:
# Load phenotype data
data = phenosToDataFrame({
    '2d': utils.loadPhenos('../cpp/out/2d_31c_5t_1e7/phenos'),
    '3d': utils.loadPhenos('../cpp/out/3d_31c_5t_1e7/phenos'),
    '1d': utils.loadPhenos('../cpp/out/1d_31c_5t_1e7/phenos'),
    
})
                          
# Serialize data on disk to avoid embedding everything
alt.data_transformers.enable('json');

In [3]:
# Plot
chart = alt.Chart(data).mark_circle(size=60, opacity=0.7).encode(
    alt.X('compl', title="Number of colours", axis=alt.Axis(tickMinStep = 1)),
    alt.Y('freq', scale=alt.Scale(type='log'), title="Frequency"),
    alt.Color('dataset', scale=alt.Scale(scheme='set1')),
    href='url:N',
    size = 'size',
    tooltip=['rule','dataset', 'count', 'size']
).properties(
title='Phenotype frequency vs complexity'
).interactive()
chart.save('freq_vs_compl.html')
chart

In [4]:
alt.vconcat(*[
    alt.Chart(phenosToDataFrame({
    name: utils.loadPhenos(path)
})).mark_circle(size=60, opacity=0.7).encode(
    alt.X('compl', title="Number of colours", axis=alt.Axis(tickMinStep = 1)),
    alt.Y('freq', scale=alt.Scale(type='log'), title="Frequency"),
    href='url:N',
    size='size',
    color='nCubeTypes',
    tooltip=['rule','count','nCubeTypes','size']
).properties(
title=name
) for name, path in [
    ('1d', '../cpp/out/1d_31c_5t_1e7/phenos'),
    ('2d', '../cpp/out/2d_31c_5t_1e7/phenos'),
    ('3d', '../cpp/out/3d_31c_5t_1e7/phenos')
]])

In [5]:
(
    chart.transform_filter((alt.datum.dataset == '1d')) &
    chart.transform_filter((alt.datum.dataset == '2d')) &
    chart.transform_filter((alt.datum.dataset == '3d'))
)

In [6]:
alt.Chart(data).transform_calculate(
    url='https://akodiat.github.io/polycubes?rule=' + alt.datum.rule
).mark_circle(size=60).encode(
    alt.X('nCubeTypes', title="Number of cube types", axis=alt.Axis(tickMinStep = 1)),
    alt.Y('freq', scale=alt.Scale(type='log'), title="Frequency"),
    color='dataset',
    href='url:N',
    #size = 'size',
    tooltip=['rule','dataset', 'count'] #,'size']
).properties(
title='Phenotype frequency vs complexity'
).interactive()

In [7]:
alt.Chart(data).transform_calculate(
    url='https://akodiat.github.io/polycubes?rule=' + alt.datum.rule
).mark_circle(size=60).encode(
    alt.X('nCubeTypes', title="Number of cube types", axis=alt.Axis(tickMinStep = 1)),
    alt.Y('compl', title="Number of colours"),
    color='dataset',
    href='url:N',
    #size = 'size',
    tooltip=['rule','dataset', 'count'] #,'size']
).properties(
title='Phenotype frequency vs complexity'
).interactive()