In [2]:
import plotly.express as px
import pandas as pd
from matplotlib import rcParams
import matplotlib.pyplot as plt
import json
import collections
rcParams['font.family'] = "Palatino"

In [3]:
kinase_to_family = pd.read_csv("../data/preprocessing/kin_to_vec.csv").applymap(lambda x: x.upper()).set_index('Kinase').to_dict()['Family']
family_to_group = pd.read_csv("../data/preprocessing/fam_to_grp.csv").applymap(lambda x: x.upper()).set_index('Family').to_dict()['Group']

In [4]:
new_df = pd.DataFrame({'Kinase': [], 'Family': [], 'Group': [], 'MLSet': []})
for fn, label in zip(['../data/preprocessing/tr_kins.json', '../data/preprocessing/vl_kins.json', '../data/preprocessing/te_kins.json'], ['train', 'val', 'test']):
    with open(fn, "r") as f:
        set_of_kinases = json.load(f)
        for kin in set_of_kinases:
            new_df.loc[len(new_df)] = [None, None, None, None]
            r = new_df.loc[len(new_df) - 1]
            r['Kinase'] = kin
            r['Family'] = kinase_to_family[kin] + "@F"
            r['Group'] = family_to_group[kinase_to_family[kin]] + "@G" if kinase_to_family[kin] in family_to_group else "<UNKNOWN>@G" 
            r['MLSet'] = label


In [5]:
num_sites_df = pd.read_csv("../data/raw_data_13437.csv").rename({'lab': 'Kinase'}, axis = 'columns')[['Kinase', 'num_sites']]
new_df = pd.merge(new_df, num_sites_df, how='left', on='Kinase').drop_duplicates(keep='first').reset_index(drop = True)

In [6]:
kin_to_num_sites = num_sites_df.set_index('Kinase').to_dict()['num_sites']

In [7]:
fig = px.sunburst(new_df, path=['Group', 'MLSet', 'Family', 'Kinase'], values=[1 for _ in range(len(new_df))])
fig.show()

In [8]:
fig = px.sunburst(new_df[new_df['num_sites'] >= 0], path=['Group', 'MLSet', 'Family', 'Kinase'], values='num_sites')
fig.show()

In [9]:
explainer = px.sunburst(pd.DataFrame({'Kinase Group': ['Kinase Group'], 'ML Set': ['ML Set'], 'Kinase Family': ['Kinase Family'], 'Kinase': ['Kinase']}), path=['Kinase Group', 'ML Set', 'Kinase Family', 'Kinase'], values=[1 for _ in range(1)])
explainer.show()

In [10]:
def get_sectors(df):
    children = []
    parents = []
    for c, col in tuple(enumerate(df.columns))[1:]:
        for r, row in df[col].iteritems():
            children.append(row)
            parents.append(df[df.columns[c - 1]].iloc[r])
    df_len = len(df)
    children += df[df.columns[0]].tolist()
    parents += ["" for _ in range(df_len)]
    return pd.DataFrame({'labels': children, 'parents': parents, 'val': [0 if "@" in children[i] else 1 for i in range(len(children))]}).drop_duplicates().reset_index(drop = True)

In [11]:
sector_df = new_df.loc[:, new_df.columns != 'num_sites'].copy()[['Group', 'MLSet', 'Family', 'Kinase']]

In [12]:
sector_df.sort_values(["Group", "MLSet", "Family", "Kinase"])

Unnamed: 0,Group,MLSet,Family,Kinase
317,<UNKNOWN>@G,test,ENPP3@F,ENPP3
325,<UNKNOWN>@G,test,HSPA5@F,HSPA5
366,<UNKNOWN>@G,test,PIK3C@F,PIK3C2A
367,<UNKNOWN>@G,test,PIK3C@F,PIK3CA
368,<UNKNOWN>@G,test,PIK3C@F,PIK3CB
...,...,...,...,...
227,TKL@G,val,STKR@F,ACVR1B
228,TKL@G,val,STKR@F,ACVRL1
231,TKL@G,val,STKR@F,BMPR1B
287,TKL@G,val,STKR@F,TGFBR1


In [13]:
def make_unq(row):
    row['MLSet'] += "@@" + row["Group"]
    return row

sector_df = sector_df.apply(make_unq, axis = 1)

In [14]:
import plotly.graph_objects as go

sectors = get_sectors(sector_df)
sectors['val'] = [0 if "@" in x["labels"] else kin_to_num_sites[x['labels']] for _, x in sectors.iterrows()]

In [128]:
import plotly

fig = go.Figure(go.Sunburst(
    ids=sectors['labels'],
    labels=[x.split("@@")[0].split("@")[0] for x in sectors['labels']],
    parents=sectors['parents'],
    values=sectors['val'],
    insidetextfont=plotly.graph_objects.sunburst.Insidetextfont(family = 'Palatino', size = 24),
    insidetextorientation = 'radial'
))

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000,
    margin = dict(t=0, l=0, r=0, b=0)
)

fig.show()
f = open("giant_sunburst.svg", "wb")
f.write(fig.to_image(format = 'svg', height = 1000, width = 1000))
f.close()

In [99]:
AA = [x for x in "ACDEFGHIKLMNPQRSTVWY"]
assert len(AA) == 20
import random
explainer = pd.read_excel("./Onion Explainer.xlsx", sheet_name = "Main Detail (2)")
fake_kins = iter([f"Seq-{x}" for x in range(sum(explainer['val']))])
for i, r in explainer.iterrows():
    if i > 29:
        next_kins = [fake_kins.__next__() for _ in range(r['val'])]
        next_kins_iterator = iter(next_kins)
        for j in range(len(explainer), len(explainer) + r['val']):
            fake_kin = next_kins_iterator.__next__()
            explainer.loc[j] = [fake_kin, "Site", r['id'], 1]
        explainer.at[i, 'val'] = 0

In [91]:
explainer

Unnamed: 0,id,labels,parents,val
0,Kinase<br>Group A,Kinase<br>Group A,,0
1,Kinase<br>Group B,Kinase<br>Group B,,0
2,Kinase<br>Group C,Kinase<br>Group C,,0
3,A_Train,Train,Kinase<br>Group A,0
4,A_Val,Val,Kinase<br>Group A,0
5,A_Test,Test,Kinase<br>Group A,0
6,B_Train,Train,Kinase<br>Group B,0
7,B_Val,Val,Kinase<br>Group B,0
8,B_Test,Test,Kinase<br>Group B,0
9,C_Train,Train,Kinase<br>Group C,0


In [125]:
import plotly

sb = go.Sunburst(
    ids=explainer['id'],
    labels=explainer['labels'],
    parents=explainer['parents'],
    values=explainer['val'],
    insidetextfont=plotly.graph_objects.sunburst.Insidetextfont(family = 'Palatino', size = 20),
    insidetextorientation = 'radial')


In [127]:
fig = go.Figure(sb)

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000,
    margin = dict(t=0, l=0, r=0, b=0)
    )

fig.show()
f = open("sunburst explainer.svg", "wb")
f.write(fig.to_image(format = 'svg', height = 1000, width = 1000))
f.close()

In [84]:
import plotly

companion = pd.read_excel("./Onion Explainer.xlsx", sheet_name = "Companion")
fig = go.Figure(go.Sunburst(
    ids=companion['id'],
    labels=companion['labels'],
    parents=companion['parents'],
    values=companion['val'],
    insidetextfont=plotly.graph_objects.sunburst.Insidetextfont(family = 'Palatino', size = 20),
    insidetextorientation = 'radial'
))

fig.update_layout(
    autosize=False,
    width=1000,
    height=1000,
    margin = dict(t=0, l=0, r=0, b=0)
)

fig.show()
f = open("giant_sunburst.svg", "wb")
f.write(fig.to_image(format = 'svg', height = 1000, width = 1000))
f.close()

In [78]:
companion

Unnamed: 0,id,labels,parents,val
0,Kinase Group,Kinase Group,,0
1,Machine<br>Learning Set,Machine<br>Learning Set,Kinase Group,0
2,Kinase Family,Kinase Family,Machine<br>Learning Set,0
3,Kinase,Kinase,Kinase Family,0
4,Target Site<br>Sequence,Target Site<br>Sequence,Kinase,1


In [47]:
pd.set_option('display.max_rows', 500)

In [374]:
fig.write_html("html_version_starburst_huge.html")

In [150]:
df = pd.read_excel("./Onion Explainer.xlsx", sheet_name='NonUnique')#[['Group', 'Family', 'Kinase', 'Val']]

In [151]:
import plotly.express as px
fig = px.sunburst(df, path=['Group', 'Family', 'MLSet', 'Kinase'], values='Val')
fig.show()