In [None]:
import networkx as nx

edgotype = nx.read_gexf("data/y2hEdgotyping/edgotype.gefx")

In [None]:
import pandas as pd
import numpy as np
import itertools

In [None]:
# CENTERS = ["ENSG00000004838","ENSG00000134371"]
subgraph = edgotype
# subgraph = edgotype.subgraph(CENTERS+list(itertools.chain.from_iterable([edgotype.neighbors(c) for c in CENTERS])))

In [None]:
from collections import Counter

In [None]:
weightedEdges = [tuple([*k,v]) for k,v in dict(Counter(subgraph.edges())).items()]

In [None]:
und_g = nx.Graph()

und_g.add_weighted_edges_from(weightedEdges)

In [None]:
pos = nx.drawing.nx_agraph.graphviz_layout(und_g,prog='twopi')

In [None]:
# Step 2: Convert graph data from NetworkX's format to the pandas DataFrames expected by Altair

pos_df = pd.DataFrame.from_records(dict(node_id=k,
                                        x=x,
                                        y=y) for k,(x,y) in pos.items())

In [None]:
pos_df

In [None]:
node_df = pd.DataFrame.from_records(dict(data,
                                         **{'node_id': n,
                                           'db': n}) for n,data in subgraph.nodes.data())

In [None]:
node_df

In [None]:
import itertools

In [None]:
edge_data = ((dict(d, **{"edge_id": i,
                         'end':"source",
                         "node_id":s}),
             dict(d, **{"edge_id": i,
                        "end": 'target',
                        'node_id': t})) for i,
             (s,t,d) in enumerate(und_g.edges(data=True)))
edge_df = pd.DataFrame.from_records(itertools.chain.from_iterable(edge_data))

In [None]:
edge_df

In [None]:
data = pd.DataFrame.from_records([{"db":e["db_ensembl_gene_id_mt"],
                               "ad": e["ad_ensembl_gene_id_mt"],
                               "mt": e["aa_change_mt"],
                               "control_wt": e["LW_wt"],
                               "control_mt": e["LW_mt"],
                               "level_1_wt": e["LWH1_f_wt"],
                               "level_1_mt": e["LWH1_f_mt"],
                               "level_2_wt": e["LWH10_f_wt"],
                               "level_2_mt": e["LWH10_f_mt"],
                               "level_3_wt": e["LWH25_f_wt"],
                               "level_3_mt": e["LWH25_f_mt"],
                               "level_4_wt": e["LWA_f_wt"],
                               "level_4_mt": e["LWA_f_mt"],
                               "level_5_wt": e["LWAH1_f_wt"],
                               "level_5_mt": e["LWAH1_f_mt"],
                                  "db_sym": subgraph.nodes[e["db_ensembl_gene_id_mt"]]["symbol"],
                                  "ad_sym": subgraph.nodes[e["ad_ensembl_gene_id_mt"]]["symbol"],} for _,_,e in subgraph.edges(data=True)])
data["name"] = data[['db_sym', 'mt', 'ad_sym']].agg('-'.join, axis=1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
data.dropna()[["db","ad"]].drop_duplicates()

In [None]:
data.shape

In [None]:
wtSub = data[["db","ad","mt"]+[f"level_{i}_wt" for i in range(1,6)]].drop_duplicates(subset=["db","ad","mt"])

In [None]:
wtSub[wtSub.isna().any(1)]

In [None]:
data[data[[c for c in data.columns if "_mt" in c]].isna().any(1)]

In [None]:
data.dropna(subset=[c for c in data.columns if "_mt" not in c])

In [None]:
counts = pd.DataFrame(data[["db","ad"]+[f"level_{i}_wt" for i in range(1,6)]].drop_duplicates(subset=["db","ad"])[[f"level_{i}_wt" for i in range(1,6)]].dropna().value_counts())

In [None]:
counts

In [None]:
counts.sort_values(by="level_1_wt",ascending=False)

In [None]:
import altair as alt
from altair import expr, datum
alt.data_transformers.disable_max_rows()

In [None]:
brush = alt.selection_single(fields=['db'])
# point_sel = alt.selection_multi(on="[mousedown[event.ctrlKey], mouseup] > mousemove")

In [None]:
x,y = alt.X('x:Q', axis=None), alt.Y('y:Q', axis=None)
# use a lookup to tie position data to the other graph data
node_position_lookup = {
    'lookup': 'node_id', 
    'from_': alt.LookupData(data=pos_df, key='node_id', fields=['x', 'y'])
}
nodes = (
    alt.Chart(node_df)
    .mark_circle(size=10, opacity=1)
    .encode(x=x, y=y, tooltip='symbol:N',color=alt.condition(brush,alt.value('blue'),alt.value('lightgray')))
    .transform_lookup(**node_position_lookup).add_selection(brush)
)
edges = (
    alt.Chart(edge_df)
    .mark_line(color='gray')
    .encode(x=x, y=y, detail='edge_id:N',size="weight:Q")  # `detail` gives one line per edge
    .transform_lookup(**node_position_lookup)
)
graphChart = (
    (edges+nodes)).interactive()
#     
# )

In [None]:
chart = alt.Chart(data).mark_point(filled=True).encode(color="mt",
                                            opacity=alt.condition(brush,alt.value(1.0),alt.value(0.0025)),
                                            tooltip=alt.condition(brush,"name:N",alt.value('')),
                                            shape='ad_sym').add_selection(brush)
c1 = chart.encode(x=alt.X('level_1_wt:Q',
                          scale=alt.Scale(domain=[-1,5])),
                  y=alt.Y('level_1_mt:Q',
                          scale=alt.Scale(domain=[-1,5])),).transform_calculate(level_1_wt='datum.level_1_wt + sampleNormal(0,.05)',
                                                             level_1_mt='datum.level_1_mt + sampleNormal(0,.05)').interactive()
c2 = chart.encode(x=alt.X('level_2_wt:Q',scale=alt.Scale(domain=[-1,5])),
                  y=alt.Y('level_2_mt:Q',scale=alt.Scale(domain=[-1,5]))).transform_calculate(level_2_wt='datum.level_2_wt + sampleNormal(0,.05)',
                                                                                            level_2_mt='datum.level_2_mt + sampleNormal(0,.05)').interactive()

c3 = chart.encode(x=alt.X('level_3_wt:Q',scale=alt.Scale(domain=[-1,5])),
                  y=alt.Y('level_3_mt:Q',scale=alt.Scale(domain=[-1,5]))).transform_calculate(level_3_wt='datum.level_3_wt + sampleNormal(0,.05)',
                                                                                            level_3_mt='datum.level_3_mt + sampleNormal(0,.05)').interactive()

c4 = chart.encode(x=alt.X('level_4_wt:Q',scale=alt.Scale(domain=[-1,5])),
            y=alt.Y('level_4_mt:Q',scale=alt.Scale(domain=[-1,5]))).transform_calculate(level_4_wt='datum.level_4_wt + sampleNormal(0,.05)',
                                                             level_4_mt='datum.level_4_mt + sampleNormal(0,.05)').interactive()

c5 = chart.encode(x=alt.X('level_5_wt:Q',scale=alt.Scale(domain=[-1,5])),
            y=alt.Y('level_5_mt:Q',scale=alt.Scale(domain=[-1,5]))).transform_calculate(level_5_wt='datum.level_5_wt + sampleNormal(0,.05)',
                                                             level_5_mt='datum.level_5_mt + sampleNormal(0,.05)').interactive()

In [None]:
data.head()

In [None]:
data.shape

In [None]:
c = alt.vconcat(alt.hconcat(c1,c2),
                alt.hconcat(c3,c4),
                alt.hconcat(c5,graphChart)).configure_view(strokeWidth=0)

**level_1 : LWH1_f**
- LWH1_f : Selective media score to test for interaction, for yeast spotted on SC -LW -histidine +1 mM 3AT

**level_2 : LWH10_f**
- LWH10_f : Selective media score to test for interaction, for yeast spotted on SC -LW -histidine +10 mM 3AT

**level_3 : LWH25_f**
- LWH25_f : Selective media score to test for interaction, for yeast spotted on SC -LW -histidine +25 mM 3AT

**level_4 : LWA_f**
- LWA_f :Selective media score to test for interaction, for yeast spotted on SC -LW -adenine

**level_5 : LWAH1_f**
- LWAH1_f :Selective media score to test for interaction, for yeast spotted on SC -LW -adenine -histidine +1 mM 3AT


<!-- LW : Selective media score to control for the presence of both plasmids (AD and DB) -->

In [None]:
c

In [None]:
data2 = data.assign(delta_1=data.level_1_wt - data.level_1_mt,
            delta_2=data.level_2_wt - data.level_2_mt,
            delta_3=data.level_3_wt - data.level_3_mt,
            delta_4=data.level_4_wt - data.level_4_mt,
            delta_5=data.level_5_wt - data.level_5_mt)

In [None]:
dX = data2[[c for c in data2.columns if "delta" in c]].dropna().values

# dX = dX[dX]

In [None]:
dX

In [None]:
plt.plot(range(1,6),dX.mean(0))
plt.fill_between(range(1,6),
                  dX.mean(0)-1.96*dX.std(0),
                 dX.mean(0)+1.96*dX.std(0),alpha=.25)

In [None]:
dX.shape

In [None]:
dX = dX[~((dX < 0).any(1))]

In [None]:
dX.shape

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=1)

In [None]:
pca.fit(dX)

In [None]:
plt.hist(pca.transform(dX),bins=25)

In [None]:
pca.components_

In [None]:
dX