# Genetic correlations

In [None]:
import robustness
import altair as alt
import pandas as pd
import numpy as np
import utils
import pickle

## Sampling of data
The PLOS article use two different polyomino datasets; $S_{2,8}$ and $S_{3,8}$, resulting in maps of size
$1.7 \times 10^7 \rightarrow 13$ and $6.9 \times 10^{10} \rightarrow 147$ respectively

In [None]:
print("{:.2e}".format(utils.calculateSearchSpaceSize(nColors=4, nCubeTypes=2, nDim=2)))

In [None]:
#list(it.islice(getAllRules(nColors=1, nCubeTypes=1, nDim=2), 200))
with open("input_2d_4c_2t.txt", "w") as f:
    for rule in utils.getAllRules(nColors=4, nCubeTypes=2, nDim=2):
        f.write(rule+'\n')

In [None]:
utils.loadPhenos('../cpp/out')

In [None]:
data = {}
for name in [
    'out',
]:
    data[name] = pd.DataFrame(data=pickle.load(open('../cpp/{}/robustness.p'.format(name), 'rb')))
    data[name]['dataset'] = name
    
# Serialize data on disk to avoid embedding everything
alt.data_transformers.enable('json');

In [None]:
data['out']

## Fig 2a - Phenotype robustness as function of frequency

In [None]:
def plot2a(dfs, logMinF=-8, minCount=0):
    df = pd.concat(dfs).drop(['genotypes', 'robustnessVals'], axis=1)
    df = df[df['count'] >= minCount]
    f = np.logspace(logMinF, 0, 50)
    return alt.Chart(pd.DataFrame({'freq': f, 'robustness': f, 'dataset': 'ρₚ = fₚ'})).mark_line().encode(
        x='freq', y='robustness',color='dataset'
    ) + alt.Chart(df).transform_calculate(
        url='https://akodiat.github.io/polycubes?rule=' + alt.datum.minNc_r,
        adjustedRobustness = alt.datum.robustness / alt.datum.minNt
    ).mark_circle(size=60).encode(
        alt.X('freq', scale=alt.Scale(type='log'), title="Phenotype frequency fₚ"),
        alt.Y('robustness:Q', title="Phenotype robustness ρₚ"),
        href='url:N',
        #color='dataset',
        color='minNt',
        #size='size',
        tooltip=['minNc_r', 'count', 'minNc', 'minNt', 'robustnessVar', 'adjustedRobustness:Q']
    )

In [None]:
plot2a([data['out']]).transform_calculate(
    adjRob='datum.robustness / datum.minNt'
)

In [None]:
plot2a([data['3d_10c_5t_1e8']])

In [None]:
plot2a([data['2d_3c_2t_1e8'],data['2d_3c_3t_1e8']])

In [None]:
plot2a([
    data['3d_10c_5t_1e8'], data['3d_3c_2t_1e8'],
    data['2d_3c_3t_1e8'], data['2d_3c_2t_1e8'],
    data['1d_10c_5t_1e8']], minCount=10, logMinF=-7).properties(height=150, width=300).save('robustness.html')

# Fig 2b

In [None]:
phenos = utils.loadPhenos('../cpp/out')
import random
random.sample(phenos,10)

# Fig 3

In [None]:
def getMutationalDistance(hexA, hexB, maxColor=3, maxCubes=3, dim=3, limit=None):
    emptyCube = "000000000000"
    maxlen = max(len(hexA), len(hexB))
    assert maxlen <= maxCubes * len(emptyCube), '{} is more than {}'.format(maxlen/len(emptyCube), maxCubes)
    while len(hexA) < maxlen:
        hexA += emptyCube
    while len(hexB) < maxlen:
        hexB += emptyCube
    assert len(hexA) == maxlen, '{} should be {}'.format(len(hexA), maxlen)
    assert len(hexB) == maxlen, '{} should be {}'.format(len(hexB), maxlen)
    a = utils.parseHexRule(hexA)
    b = utils.parseHexRule(hexB)

    #Simplify?
    a = utils.simplifyRuleset(a)
    b = utils.simplifyRuleset(b)

    assert maxColor >= max(face['color'] for cube in a for face in cube), ''
    assert maxColor >= max(face['color'] for cube in b for face in cube)
    attrs = ['color', 'orientation'] if dim==3 else ['color']
    dist = 0
    for cubeA, cubeB in zip(a,b):
        for i in range(6):
            for attr in attrs:
                if cubeA[i][attr] != cubeB[i][attr]:
                    dist += 1
                    if limit and dist > limit:
                        return dist
    return dist
"""
def groupNeutralComponents(genotypes, sortevery=10):
    nItems = len(items)
    nGroups = 0
    print("About to group {} rules".format(nItems), flush=True)
    groups = []
    for i, item in enumerate(items):
        if verbose:
            print("{} components, {} genotypes grouped ({:n}%)".format(nGroups, i, 100*i/nItems), end='\r', flush=True)
        foundGroup = False
        for group in groups:
            if test(item, group[0]):
                group.append(item)
                foundGroup = True
                break
        if not foundGroup:
            groups.append([item])
            nGroups += 1
        if i%sortevery == 0:
            groups.sort(key=lambda x: len(x), reverse=True) # Make sure most common is first
    groups.sort(key=lambda x: len(x), reverse=True)
    return groups
"""

In [None]:
import random
r = a = '858500000000050000840000850086000000'
distData = []
for i in range(200):
    r = random.choice(robustness.enumerateMutations(r,maxColor=3, maxCubes=3, dim=3))
    distData.append({
        'mutations': i,
        'distance': getMutationalDistance(a, r)
    })

alt.Chart(pd.DataFrame(data=distData)).mark_line().encode(
    x='mutations',
    y='distance'
)


In [None]:
phenos_2d_3c_3t_1e6 = utils.loadPhenos('../cpp/out')

In [None]:
import networkx as nx
def calcConnectedComponents(pheno):
    G = nx.Graph()
    for g1 in pheno['genotypes']:
        G.add_node(g1)
        for g2 in pheno['genotypes']:
            if g1 is not g2 and getMutationalDistance(g1, g2, maxColor=31, maxCubes=5, dim=3, limit=1) <= 1:
                G.add_edge(g1, g2)
    #nx.draw(G)
    pheno['nNC'] = nx.number_connected_components(G)
    pheno['largestNC'] = len(max(nx.connected_components(G), key=len))
    return pheno

In [None]:
calcConnectedComponents(phenos_2d_3c_3t_1e6[6])

In [None]:
for i, p in enumerate(phenos_2d_3c_3t_1e6):
    calcConnectedComponents(p)
    print((i, p['nNC'], p['largestNC']), end=',')

In [None]:
alt.Chart(pd.DataFrame(data=phenos_2d_3c_3t_1e6)).mark_circle().encode(
    alt.X('freq', scale=alt.Scale(type='log'), title="Phenotype frequency fₚ"),
    y='nNC'
)

In [None]:
alt.Chart(pd.DataFrame(data=phenos_2d_3c_3t_1e6)).mark_circle().encode(
    alt.X('freq', scale=alt.Scale(type='log'), title="Phenotype frequency fₚ"),
    y='largestNC'
)