# Reference calculation

In [None]:
import utils
import altair as alt
import pandas as pd
import re
import math
from analysePhenotype import readAnalysed

def plotFreq(path, nSampled, nDim, method, minCount=1, nMer = None, saveHTML=False):
    title = 'Frequency vs complexity, {:.1E} samples of {}D {}'.format(nSampled, nDim, method)
    df = readAnalysed(path)
    df = df.loc[df['count'] >= minCount]
    df = df.sort_values(['size'], ascending=True)
    if nMer is not None:
        df = df.loc[df['size'] == nMer]
        title = '{}-mer frequency vs complexity, {:.1E} samples of {}D {}'.format(nMer, nSampled, nDim, method)
    chart = alt.Chart(df).mark_circle(size=60).encode(
        alt.X('minNc:Q', axis=alt.Axis(tickMinStep = 1), title="Complexity (number of colours)"),
        alt.Y('freq:Q', scale=alt.Scale(type='log'), title="Frequency"),
        href='url:N',
        color=alt.Color('size:Q', scale=alt.Scale(scheme="inferno")),
        tooltip=['url:N', 'count', 'freq:Q', 'minLz', 'minNc', 'minNt', 'rotsymms', 'reflsymms', 'invsymms']
    ).transform_calculate(
        freq='datum.count/{}'.format(nSampled),
        url='"https://akodiat.github.io/polycubes/?rule="+datum.minLz_r',
        symmetries='1+datum.rotsymms+datum.reflsymms+datum.invsymms'
    ).properties(
        width=200,
        height=200
    ).properties(title=title)
    if saveHTML:
        chart.save('freq_vs_compl.html')
    return chart

## First, let's take a look at the raw data

In [None]:
path = '/mnt/extraspace/joakim/refcalc/seeded/2d'
!ls -l $path
alt.data_transformers.disable_max_rows()

### How much time did it take?

In [None]:
! grep "Total run time" $path/*.out

### How large is the file?

In [None]:
! ls -lh $path/*.h5

## How many did we sample and how many were valid?

In [None]:
def statsFromPath(path):
    outputMessages = ! grep -h "Done!" $path/polycubes*.out
    nValid = nUnbounded = nNondet = 0
    for row in outputMessages:
        nP, nU, nN = [int(n) for n in re.compile('(\d+)').findall(row)]
        nValid += nP
        nUnbounded += nU
        nNondet += nN
    assemblyMode, nDim = path.split('/')[-2:]
    df = pd.DataFrame({
        'type': ['Valid', 'Non-deterministic', 'Unbounded'],
        'count': [nValid, nUnbounded, nNondet]
    })
    df['assemblyMode'] = assemblyMode
    df['nDim'] = nDim
    
    print('Loaded {} {}:'.format(assemblyMode, nDim))
    print('  Sampled {:,d} rules in total'.format(nValid + nNondet + nUnbounded))
    print('  Found {:,d} valid rules. Also found {:,d} unbounded and {:,d} nondeterministic rules'.format(nValid, nUnbounded, nNondet))
    
    return df

In [None]:
alt.Chart(statsFromPath(path)).mark_bar().encode(
    x=alt.X('count:Q', title="Samples"),
    y=alt.Y('nDim:N', title=None),
    color=alt.Color('type:O', title="Rule validity", scale=alt.Scale(scheme='set1')),
    row = alt.Row('assemblyMode', title=None)
)

In [None]:
import h5py
[h5path] = !ls $path/*.h5
f = h5py.File(h5path, 'r')

In [None]:
' '.join([k for k in f.keys()])

In [None]:
alt.Chart(statsFromPath(path)).mark_bar().encode(
    x=alt.X('count:Q', title="Samples"),
    color=alt.Color('type:O', title="Rule validity", scale=alt.Scale(scheme='set1'))
)

## How many phenotypes of each size did we find?

In [None]:
oeisA000988 = [1, 1, 1, 2, 7, 18, 60, 196, 704, 2500, 9189, 33896, 126759, 476270, 1802312, 6849777, 26152418]
oeisPlot = alt.Chart(pd.DataFrame({'count': oeisA000988, 'size': range(len(oeisA000988))})
).mark_line(color='red').encode(
    alt.X("size:O"),
    alt.Y("count:Q")
)
oeisPlot

In [None]:
def readAndAssign(path):
    [f] = ! ls $path/*.ftr
    df = readAnalysed(f)
    return df
df = readAndAssign(path)
chart = alt.Chart(df)

In [None]:
chart.mark_bar().encode(
    alt.X('size:O', title="Polyomino size"),
    alt.Y('sum(count):Q', scale=alt.Scale(type='log'), title="Rule count"),
    tooltip=['sum(count):Q', "size"]
).properties(
    width=400,
    height=200
)

In [None]:
chart.mark_bar().encode(
    alt.X("size:O", title="Polyomino size"),
    alt.Y("count()", scale=alt.Scale(type='log'), title="Polyomino count"),
    tooltip=["count()", "size"]
).properties(
    width=400,
    height=200
) + oeisPlot

## Complexity distributions

In [None]:
chart.mark_bar().encode(
    alt.X("minNc:O", title="Complexity (number of colours)"),
    alt.Y("count()", scale=alt.Scale(type='linear'), title="Polyomino count"),
    tooltip=["count()", "size"]
).properties(
    width=400,
    height=200,
    title='All structures'
)

In [None]:
df16 = df.loc[df['size'] == 16]
alt.Chart(df16).mark_bar().encode(
    alt.X("minNc:O", title="Complexity (number of colours)"),
    alt.Y("count()", scale=alt.Scale(type='linear'), title="Polyomino count"),
    tooltip=["count()", "size"]
).properties(
    width=400,
    height=200,
    title='All 16-mers'
)

## Zoo plots

In [None]:
def plotZoo(path, size, sampled, dims=[400,400], sizeScaling='log'):
    df = readAnalysed(path)
    source = df.loc[df['size'] == size]
    width = math.ceil(math.sqrt(len(source) * dims[0]/dims[1]))
    assemblyMode, nDim = path.split('/')[-3:-1]
    return alt.Chart(source).transform_window(
        rank="rank()",
        sort=[
            alt.SortField("count", order="descending"),
            alt.SortField("minLz_r", order="descending"), # Needed if counts are equal
        ]
    ).transform_calculate(
        freq='datum.count/{}'.format(sampled),
        url='"https://akodiat.github.io/polycubes/?rule="+datum.minLz_r',
        row="floor((datum.rank-1)/{})".format(width),
        col="(datum.rank-1) % {}".format(width)
    ).mark_point(
        filled=True, size=100
    ).encode(
        x=alt.X("col:O", axis=None),
        y=alt.Y("row:O", axis=None),
        tooltip=['count', 'rank:Q', 'minLz', 'minNc', 'minNt', 'sizeId', 'minNt', 'minLz_r'],
        size = alt.Size('freq:Q', title="Frequency", scale=alt.Scale(type=sizeScaling)),
        color = alt.Color('freq:Q', scale=alt.Scale(scheme="redyellowgreen")),
        href='url:N'
    ).properties(
        width=dims[0],
        height=dims[1],
        title = '{}-mer frequencies ({} {})'.format(size, assemblyMode, nDim)
    ).configure_view(
        strokeWidth=0
    )

In [None]:
plotZoo(path+'/out_18067_analysed.ftr', 16, 1e9, dims=[200,500])