# Frequency vs complexity

In [None]:
import utils
import altair as alt
import pandas as pd
from analysePhenotype import readAnalysed

Check what files we have to analyse:

In [None]:
! ls -lh /mnt/extraspace/joakim/*/*/*/*.ftr

We likely want more than 5000 rows

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
def saveRankedRules(size, path):
    df = readAnalysed(path)
    df.loc[df['size'] == size][['count', 'minNt_r']].sort_values(['count'], ascending=False).to_csv('{}_{}d_size_{}.csv'.format(method, nDim, size))
#saveRankedRules(8, df)

In [None]:
def getSymmGroup(row):
    if row['reflsymms'] == 2:
      return 'D4'
    if row['rotsymms'] == 3:
      return 'C4'
    if row['reflsymms'] == 1:
      return 'D2'
    if row['rotsymms'] == 1:
      return 'C2'
    if row['reflsymms'] == 0:
      return 'D1'
    if row['rotsymms'] == 0:
      return 'C1'
    return 'Other'

In [None]:
def plotFreq(path, nSampled, nDim, method, minCount=1, nMer = None, saveHTML=False):
    title = 'Frequency vs complexity, {:.1E} samples of {}D {}'.format(nSampled, nDim, method)
    tmp = "{}/{}/{}d".format(path, method, nDim)
    [filename] = !ls $tmp/*.ftr
    print(filename)
    df = readAnalysed(filename)
    df = df.loc[df['count'] >= minCount]
    df = df.sort_values(['size'], ascending=True)
    #df['symmetry_group'] = df.apply(lambda row: getSymmGroup(row), axis=1)
    if nMer is not None:
        df = df.loc[df['size'] == nMer]
        title = '{}-mer frequency vs complexity, {:.1E} samples of {}D {}'.format(nMer, nSampled, nDim, method)
    chart = alt.Chart(df).mark_circle(size=60).encode(
        alt.X(alt.repeat("column"), type='quantitative', axis=alt.Axis(tickMinStep = 1)),
        alt.Y('freq:Q', scale=alt.Scale(type='log'), title="Frequency"),
        href='url:N',
        #color=alt.Color('symmetry_group:N'),
        color=alt.Color('size:Q', scale=alt.Scale(scheme="inferno")),
        tooltip=['url:N', 'count', 'freq:Q', 'minLz', 'minNc', 'minNt', 'rotsymms', 'reflsymms', 'invsymms']
    ).transform_calculate(
        freq='datum.count/{}'.format(nSampled),
        url='"https://akodiat.github.io/polycubes/?rule="+datum.minLz_r',
        symmetries='1+datum.rotsymms+datum.reflsymms+datum.invsymms'
    ).properties(
        width=200,
        height=200
    ).repeat(
        column=['minNt', 'minNc', 'minLz']
    ).properties(title=title)
    if saveHTML:
        chart.save('freq_vs_compl.html')
    return chart

# 1e8

### $$n_c <= 5$$

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210924',
    nSampled = 1e8, nDim = 2, method = 'seeded', minCount = 1
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210924',
    nSampled = 1e8, nDim = 3, method = 'seeded', minCount = 1
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210924',
    nSampled = 1e8, nDim = 3, method = 'stochastic', minCount = 1
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210924/',
    nSampled = 1e8, nDim = 2, method = 'stochastic', minCount = 1
)

### $$n_c <= 5$$

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210918',
    nSampled = 1e8, nDim = 2, method = 'seeded', minCount = 10
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210918',
    nSampled = 1e8, nDim = 3, method = 'seeded', minCount = 10
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210918/',
    nSampled = 1e8, nDim = 2, method = 'stochastic', minCount = 10
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210918/',
    nSampled = 1e8, nDim = 3, method = 'stochastic', minCount = 10
)

## 8-mers

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210924',
    nSampled = 1e7, nDim = 2, method = 'seeded', minCount = 1, nMer=16
)

# OLD SAMPLINGS

## Plotting all sizes

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210324/stochastic/3d/out_3345_analysed.ftr',
    nSampled = 1e8, nDim = 3, method = 'stochastic', minCount = 10
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210324/seeded/3d/out_16757_analysed.ftr',
    nSampled = 1e8, nDim = 3, method = 'seeded', minCount = 10
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210324/stochastic/2d/out_27175_analysed.ftr',
    nSampled = 1e8, nDim = 2, method = 'stochastic', minCount = 10
)

## Plotting specific sizes

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210324/seeded/3d/out_16757_analysed.ftr',
    nSampled = 1e8, nDim = 3, method = 'seeded', minCount = 10, nMer = 8
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210324/stochastic/2d/out_27175_analysed.ftr',
    nSampled = 1e8, nDim = 2, method = 'stochastic', minCount = 10, nMer = 16
)

In [None]:
plotFreq(
    path = '/mnt/extraspace/joakim/210324/stochastic/2d/out_27175_analysed.ftr',
    nSampled = 1e8, nDim = 2, method = 'stochastic', minCount = 10, nMer = 16
)

In [None]:
alt.Chart(df).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color=alt.Color('freq:Q', scale=alt.Scale(type='log', scheme="inferno")),
    href='url:N'
).properties(
    width=150,
    height=150
).transform_calculate(
    freq='datum.count/{}'.format(nSampled),
    url='"https://akodiat.github.io/polycubes/?rule="+datum.minLz_r',
    symmetries='1+datum.rotsymms+datum.reflsymms+datum.invsymms'
).repeat(
    row=['minLz', 'minNc', 'minNt', 'size', 'symmetries'],
    column=['minLz', 'minNc', 'minNt', 'size', 'symmetries']
).properties(
    title='Complexity comparison, {:.1E} samples of {}D {}'.format(nSampled, nDim, method)
)

In [None]:
alt.Chart(df).mark_rect().encode(
    alt.X('minNc:N'),
    alt.Y('minNt:N'),
    color=alt.Color('count()', scale=alt.Scale(scheme="inferno"))
).properties(
    width=200,
    height=200
)


In [None]:
alt.Chart(df).mark_rect().encode(
    alt.Y(alt.repeat("row"), type='ordinal', sort='-y'),
    x='size:N',
    color=alt.Color('count()', scale=alt.Scale(type='log', scheme="inferno"))
).repeat(
    row=['rotsymms','reflsymms','invsymms']
)