In [None]:
import networkx as nx

In [None]:
import pandas as pd

# Load HuRI

In [None]:
huri = pd.read_csv("data/HuRI/HI-union.tsv",
                   delimiter="\t",
                   header=None)

In [None]:
huri

# Make networkx graph for HuRI

In [None]:
G = nx.from_edgelist(list(zip(huri[0],huri[1])))

# Load Processed Edgotyping Data

In [None]:
df = pd.read_csv("data/y2hEdgotyping/y2hWithMutPred2Info.csv",index_col=0)

In [None]:
df

# Get Unique Nodes in Edgotyping Graph

In [None]:
nodes = pd.concat((df[["db_ensembl_gene_id_mt","p_seq_db"]].rename({"db_ensembl_gene_id_mt":"ensembl_gene_id",
                                                                       "p_seq_db":"p_seq"},axis=1),
             df[["ad_ensembl_gene_id_mt","p_seq_ad"]].rename({"ad_ensembl_gene_id_mt":"ensembl_gene_id",
                                                                       "p_seq_ad":"p_seq"},axis=1))).drop_duplicates().reset_index()

# Write sequences to csv files in batches of 100 for Uniprot Peptide Search

In [None]:
for fNum,startIdx in enumerate(np.arange(0,len(nodes),100)):
    with open(f"data/y2hEdgotyping/sequences_{fNum}.csv","w") as f:
        for idx,n in nodes.iloc[startIdx:startIdx+100].iterrows():
            f.write(f"{n.p_seq}\n")

# Write Edgotyping Sequences to fasta for blastp search

In [None]:
with open("data/y2hEdgotyping/sequences.fasta","w") as f:
    for idx,(_,ensg,seq) in nodes.iterrows():
        f.write(f">{ensg}\n{seq}\n")

# Make Edgotyping networkx graph

In [None]:
y2hNetwork = nx.MultiGraph()

for idx,row in df[["db_ensembl_gene_id_mt","db_orf_id","p_seq_db","symbol_db"]].drop_duplicates().iterrows():
    y2hNetwork.add_node(row.db_ensembl_gene_id_mt,orf_id=row.db_orf_id,seq=row.p_seq_db, symbol=row.symbol_db)
    
for idx,row in df[["ad_ensembl_gene_id_mt","ad_orf_id","p_seq_ad","symbol_ad"]].drop_duplicates().iterrows():
    y2hNetwork.add_node(row.ad_ensembl_gene_id_mt,orf_id=row.ad_orf_id,seq=row.p_seq_ad, symbol=row.symbol_ad)

In [None]:
df

In [None]:
for idx,r in df[["db_ensembl_gene_id_mt", "ad_ensembl_gene_id_mt","db_mut_id_mt","aa_change_mt",
    "clinical_significance_mt","nt_change_mt","Substitution","MutPred2 score",
    'Molecular mechanisms with Pr >= 0.01 and P < 0.99',
    'Motif information', 'Remarks', 'featFileNum', 'fileRowNum',
    "LW_mt", "LWH1_f_mt", "LWH10_f_mt", "LWH25_f_mt", "LWA_f_mt","LWAH1_f_mt","score_mt", "score_norm_mt",
    'LW_wt','LWH1_f_wt', 'LWH10_f_wt', 'LWH25_f_wt', 'LWA_f_wt', 'LWAH1_f_wt',"score_wt", "score_norm_wt"]].iterrows():
    y2hNetwork.add_edge(r[0],r[1],**r.to_dict())

In [None]:
len(y2hNetwork.edges)

In [None]:
for e in G.edges():
    G.edges[e]["weight"] = 1 if y2hNetwork.has_edge(*e) else 0

In [None]:
sum([e[2]["weight"] for e in G.edges.data()])

# pymetis example

In [None]:
metisAdj = {1: [2,3,5],
            2: [1,3,4],
            3: [1,2,4,5],
            4: [2,3,6,7],
            5: [1,3,6],
            6: [5,4,7],
            7: [4,6]}
metisAdj = {k-1:[vi-1 for vi in v] for k,v in metisAdj.items()}

In [None]:
mg = nx.from_dict_of_lists(metisAdj)

In [None]:
nx.draw_networkx(mg)

In [None]:
def toMetisFormat(MG):
    xadj = [0]
    adjncy = []
    weight = []
    nodemap = dict(zip(list(MG.nodes()),range(len(MG.nodes()))))
    for n in MG.nodes():
        neighbors = list(MG.neighbors(n))
        adjncy += [nodemap[ni] for ni in neighbors]
        xadj.append(len(neighbors) + xadj[-1])
        edgeweights = []
        for ni in neighbors:
            if "weight" in MG.edges()[(n,ni)]:
                edgeweights.append(MG.edges()[(n,ni)]["weight"])
            else:
                edgeweights.append(1)
        weight += edgeweights
    return xadj, adjncy,weight,{v:k for k,v in nodemap.items()}

In [None]:
xadj,adjncy,weight,nodemap = toMetisFormat(mg)

In [None]:
import pymetis

In [None]:
n_cuts,membership = pymetis.part_graph(2,xadj=xadj,adjncy=adjncy,eweights=weight)

In [None]:
n_cuts

In [None]:
membership

In [None]:
np.array([n for n in mg.nodes()])[np.array(membership) == 1]

# Apply to HuRI

In [None]:
hu_xadj,hu_adjncy,hu_weight,hu_nodemap = toMetisFormat(G)

In [None]:
import pymetis

In [None]:
hu_n_cuts,hu_membership = pymetis.part_graph(3,xadj=hu_xadj,
                                             adjncy=hu_adjncy,
                                             eweights=hu_weight)

In [None]:
hu_n_cuts

In [None]:
np.unique(hu_membership,return_counts=True)

In [None]:
trainIDs = [hu_nodemap[i] for i in np.argwhere(np.array(hu_membership) == 0).ravel()]
G_train = G.subgraph(trainIDs)
y2hNetwork_train = y2hNetwork.subgraph(trainIDs)

valIDs = [hu_nodemap[i] for i in np.argwhere(np.array(hu_membership) == 1).ravel()]
G_val = G.subgraph(valIDs)
y2hNetwork_val = y2hNetwork.subgraph(valIDs)

testIDs = [hu_nodemap[i] for i in np.argwhere(np.array(hu_membership) == 2).ravel()]
G_test = G.subgraph(testIDs)
y2hNetwork_test = y2hNetwork.subgraph(testIDs)

In [None]:
len(G_train.edges),len(G_val.edges), len(G_test.edges)

In [None]:
len(y2hNetwork_train.edges), len(y2hNetwork_val.edges), len(y2hNetwork_test.edges)

In [None]:
nx.write_gpickle(G_train,"data/y2hEdgotyping/HuRI_train.gpickle")
nx.write_gpickle(G_val,"data/y2hEdgotyping/HuRI_val.gpickle")
nx.write_gpickle(G_test,"data/y2hEdgotyping/HuRI_test.gpickle")
nx.write_gpickle(y2hNetwork_train,"data/y2hEdgotyping/edgotype_train.gpickle")
nx.write_gpickle(y2hNetwork_val,"data/y2hEdgotyping/edgotype_val.gpickle")
nx.write_gpickle(y2hNetwork_test,"data/y2hEdgotyping/edgotype_test.gpickle")