In [None]:
import networkx as nx

In [None]:
edgotype_train = nx.read_gpickle("data/y2hEdgotyping/edgotype_train.gpickle")

In [None]:
import pandas as pd

In [None]:
intinsider = pd.read_csv("/data/dzeiberg/interactomeInsider/H_sapiens_interfacesHQ.txt",
                         delimiter="\t")

In [None]:
intinsider

In [None]:
uniprotEdges = []
for id_x, id_y,edge in edgotype_train.edges(data=True):
    x = edgotype_train.nodes[id_x]
    y = edgotype_train.nodes[id_y]
    acc_x = x["uniprotMatches"].Entry.values
    acc_y = y["uniprotMatches"].Entry.values
    uniprotEdges.append((acc_x,acc_y,id_x,id_y,edge))

uniprotEdges = pd.DataFrame([pd.Series(data=[e[0][0] if len(e[0]) else np.nan for e in uniprotEdges],name="P1"),
                             pd.Series(data=[e[1][0] if len(e[1]) else np.nan for e in uniprotEdges],name="P2"),
                             pd.Series(data=[e[2] for e in uniprotEdges],name="ensg_P1"),
                             pd.Series(data=[e[3] for e in uniprotEdges],name="ensg_P2"),
                             pd.Series(data=[e[4] for e in uniprotEdges],name="edge_attrs")]).T

In [None]:
uniprotEdges

In [None]:
edgePredictedBindingSites = pd.concat((pd.merge(intinsider,uniprotEdges,how="inner",
                                                left_on=["P1","P2"],
                                                right_on=["P1","P2"]),
                                       pd.merge(intinsider,uniprotEdges,how="inner",
                                                left_on=["P1","P2"],
                                                right_on=["P2","P1"]).drop(["P1_y",
                                                                            "P2_y"],
                                                                           axis=1).rename({"P1_x":"P1",
                                                                                                           "P2_x":"P2"},axis=1))).reset_index()

In [None]:
def parseRanges(s):
    if "," not in s:
        return []
    vals = s[1:-1].split(",")
    values = []
    if not len(vals):
        return values
    for v in vals:
        if "-" in v:
            v0, v1 = [int(i) for i in v.split("-")]
            values += range(v0,v1+1)
        else:
            try:
                values.append(int(v))
            except ValueError:
                print(s)
                raise ValueError
    return values

In [None]:
edgePredictedBindingSites = edgePredictedBindingSites.assign(P1_IRES=edgePredictedBindingSites.P1_IRES.apply(parseRanges),
                                                             P2_IRES=edgePredictedBindingSites.P2_IRES.apply(parseRanges))

In [None]:
def make_y2h_target(d):
    names = ["LWH1_f_","LWH10_f_","LWH25_f_",
             "LWA_f_","LWAH1_f_"]
    deltas = np.zeros(len(names))
    for i,name in enumerate(names):
        deltas[i] = d[name+"wt"] - d[name+"mt"]
    return np.any(deltas >= 2)

In [None]:
def proximityToSite(r):
    loc = int(r["edge_attrs"]["aa_change_mt"][3:-3])
    if r["edge_attrs"]["db_ensembl_gene_id_mt"] == r.ensg_P1:
        site = r.P1_IRES
    else:
        site = r.P2_IRES
    if not len(site):
        return -1e10
    return -1 * min([abs(loc - s) for s in site])

In [None]:
edgePredictedBindingSites = edgePredictedBindingSites.assign(proximity_to_site=edgePredictedBindingSites.apply(proximityToSite,axis=1))

In [None]:
edgePredictedBindingSites = edgePredictedBindingSites.assign(y2h_target=edgePredictedBindingSites.edge_attrs.apply(make_y2h_target),
                                                             variantAtBindingSite=edgePredictedBindingSites.apply(lambda row:row["edge_attrs"],axis=0))

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
edgePredictedBindingSites

In [None]:
roc_auc_score(edgePredictedBindingSites["y2h_target"],edgePredictedBindingSites["variant_at_site"])