# Frequency vs modularity

Modularity index is defined as
$ \frac{size}{n_s} $

In [None]:
import utils
import altair as alt
import pandas as pd
from analysePhenotype import readAnalysed

Check what files we have to analyse:

In [None]:
! ls -lh /mnt/extraspace/joakim/*/*/*/*.ftr

Drop phenotypes found less than 100 times

We likely want more than 5000 rows

In [None]:
alt.data_transformers.disable_max_rows()

Initial setup:

In [None]:
def readAndAssign(path):
    df = readAnalysed(path)
    nDim = path.split('/')[-2]
    df['nDim'] = nDim
    return df

In [None]:
def plotModularity(path, nSampled, minCount=1, minN = None, maxN=None, saveHTML=False):
    title = 'Frequency vs modularity, {:.1E} samples'.format(nSampled)
    tmp = "{}/seeded/*d".format(path)
    paths = !ls $tmp/*.ftr
    df = pd.concat([readAndAssign(path) for path in paths])
    df = df.loc[df['count'] >= minCount]
    if minN is not None:
        df = df.loc[df['size'] >= minN]
    if maxN is not None:
        df = df.loc[df['size'] <= maxN]
        title = 'Frequency vs modularity, {:.1E} samples'.format(nSampled)
        color = alt.Color('minNt:Q', scale=alt.Scale(scheme="viridis"))
    else:
        color = alt.Color('size:Q', scale=alt.Scale(scheme="viridis", type='log'))
    chart = alt.Chart(df).mark_circle(size=40).encode(
        alt.X("modularity:Q",scale=alt.Scale(type='linear'), title='Modularity index'),
        alt.Y('freq:Q', scale=alt.Scale(type='log'), title="Frequency"),
        row=alt.Row("nDim:O", title=None),
        column=alt.Column('size:N'),
        href='url:N',
        color=color,
        tooltip=['url:N', 'count', 'freq:Q', 'minLz', 'minNc', 'minNt', 'modularity:Q', 'size'],
    ).transform_calculate(
        freq='datum.count/{}'.format(nSampled),
        modularity='datum.size/datum.minNt',
        url='"https://akodiat.github.io/polycubes/?rule="+datum.minLz_r',
        symmetries='1+datum.rotsymms+datum.reflsymms+datum.invsymms'
    ).properties(width=100, height=100, title=title
    )
    if saveHTML:
        chart.save('freq_vs_modularity.html')
    return chart

In [None]:
plotModularity(
    path = '/mnt/extraspace/joakim/210918',
    nSampled = 1e8, minCount = 10
)

# Individual sizes

In [None]:
plotModularity(
    path = '/mnt/extraspace/joakim/210918',
    nSampled = 1e8, minCount = 1, minN = 6, maxN=10
)

In [None]:
plotModularity(
    path = '/mnt/extraspace/joakim/refcalc',
    nSampled = 1e8, minCount = 1, minN = 6, maxN=10
)