# ORGANISM: Mus_musculus
# Datasource: https://downloads.thebiogrid.org/BioGRID/Release-Archive/BIOGRID-3.5.182/

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
df_original = pd.read_csv("data/BIOGRID-ORGANISM-Mus_musculus-3.5.182.tab2.txt", sep='\t')

In [3]:
df_original.head()

Unnamed: 0,#BioGRID Interaction ID,Entrez Gene Interactor A,Entrez Gene Interactor B,BioGRID ID Interactor A,BioGRID ID Interactor B,Systematic Name Interactor A,Systematic Name Interactor B,Official Symbol Interactor A,Official Symbol Interactor B,Synonyms Interactor A,...,Pubmed ID,Organism Interactor A,Organism Interactor B,Throughput,Score,Modification,Phenotypes,Qualifications,Tags,Source Database
0,117015,4087,75141,110262,217250,-,-,SMAD2,Rasd2,JV18|JV18-1|MADH2|MADR2|hMAD-2|hSMAD2,...,15761153,9606,10090,High Throughput,-,-,-,-,-,BIOGRID
1,117016,4087,19376,110262,202576,-,RP23-185A18.5,SMAD2,Rab34,JV18|JV18-1|MADH2|MADR2|hMAD-2|hSMAD2,...,15761153,9606,10090,High Throughput,-,-,-,-,-,BIOGRID
2,117017,4087,69159,110262,213261,-,-,SMAD2,Rhebl1,JV18|JV18-1|MADH2|MADR2|hMAD-2|hSMAD2,...,15761153,9606,10090,High Throughput,-,-,-,-,-,BIOGRID
3,117018,4087,72433,110262,215371,-,-,SMAD2,Rab38,JV18|JV18-1|MADH2|MADR2|hMAD-2|hSMAD2,...,15761153,9606,10090,High Throughput,-,-,-,-,-,BIOGRID
4,117019,4087,69288,110262,213337,-,-,SMAD2,Rhobtb1,JV18|JV18-1|MADH2|MADR2|hMAD-2|hSMAD2,...,15761153,9606,10090,High Throughput,-,-,-,-,-,BIOGRID


In [4]:
df = df_original[["Official Symbol Interactor A", "Official Symbol Interactor B"]]

In [5]:
df.head()

Unnamed: 0,Official Symbol Interactor A,Official Symbol Interactor B
0,SMAD2,Rasd2
1,SMAD2,Rab34
2,SMAD2,Rhebl1
3,SMAD2,Rab38
4,SMAD2,Rhobtb1


In [6]:
G = nx.from_pandas_edgelist(df, 'Official Symbol Interactor A', 'Official Symbol Interactor B', create_using=nx.DiGraph())

In [7]:
print(G.is_directed(), len(G.nodes), len(G.edges()))

True 13567 46406


In [8]:
# deleting self-loop edges
for n, nbrs in G.adj.items():
    if n in nbrs:
        G.remove_edge(n,n)

In [9]:
print(G.is_directed(), len(G.nodes), len(G.edges()))

True 13567 46111


### 3 nodes chain excluding common regulator 

In [10]:
def common_regulator(G, node1, node2):
    in_node1 = [a[0] for a in G.in_edges(node1)]
    in_node2 = [b[0] for b in G.in_edges(node2)]
    for i in in_node1:
        if i in in_node2:
            return True
        return False

In [11]:
common_regulator(G,'marA','putP')

In [12]:
node_set = set() # to save the node list
for node1 in G.nodes:
    for node2 in G.neighbors(node1):
        if (G.out_degree(node2)>1 or G.out_degree(node2)>1 or node2==node1):
            continue
        else:
            for node3 in G.neighbors(node2):
                if(node3 not in [node1, node2] and (node3 not in G.neighbors(node1)) and not(common_regulator(G, node1, node3))): 
                    print(node1, node2, node3)
                    node_set.add(node1); node_set.add(node2); node_set.add(node3);

SMAD2 Map2k3 Mapk14
SMAD2 Trim35 Irf7
Wwp2 Egr2 Pias2
Lmo2 Kmt2b H3f3a
Lmo2 Pold1 Timeless
Lmo2 Psmc1 Ngly1
Lmo2 Psme3 Mafa
Lmo2 Appbp2 Crocc
Lmo2 Drg1 Zc3h15
Smad3 POLR2G Atf7ip
Smad3 PPIL2 Crnkl1
SMAD3 Map2k3 Mapk14
SMAD3 Trim35 Irf7
SMAD3 Erbb2ip Pkp4
SMAD4 Rab13 Micall2
SMAD4 Map2k3 Mapk14
SMAD4 Trim35 Irf7
SMAD4 Rnf14 Zfp236
SMAD4 Ubb Polk
SMAD4 Ap2b1 Eps15
SMAD4 Apbb2 Mapk8
Smad1 Mxi1 Smarca5
TGFBR1 Rab13 Micall2
TGFBR1 Gna13 Cdh1
TGFBR1 Arhgap31 Itsn1
TGFBR1 Bcr Rac1
TGFBR1 Fbxl12 Camk1
TGFBR1 Map2k3 Mapk14
TGFBR1 Csnk1a1 Cops2
TGFBR1 Noxo1 Cyba
TGFBR1 Fkbp1b Ryr2
TGFBR1 Ubb Polk
TGFBR1 Usp21 Nanog
Atg16l1 Rpl5 Npm1
Atg16l1 Exosc4 Ddx60
Atg16l1 Hnrnpd Cited4
Atg16l1 Cpt1a Bcl2
Atg16l1 Csnk2a1 Akt1
Atg16l1 Evl Trim9
Atg16l1 Gnai3 Gab1
Atg16l1 Trip12 Sox6
Atg16l1 Kras Zhx2
Atg16l1 Lmnb1 Map1lc3b
Atg16l1 Nap1l4 Nap1l2
Atg16l1 Plec Actg2
Atg16l1 Lcp1 Atxn2
Atg16l1 Pold1 Timeless
Atg16l1 Med1 Cebpb
Atg16l1 Prps1 Park2
Atg16l1 Psme3 Mafa
Atg16l1 Rbbp4 Dppa3
Atg16l1 Polr2c Polr2j
Atg16

Nr0b2 THRA Nsd1
Nr0b2 Nr5a2 Nr1h4
cgn Tjp1 Actn4
Hspa5 SNCAIP Pphln1
Kpna2 CBX1 Suv39h1
Kpna2 SART3 Usp15
Kpna1 CBX1 Suv39h1
Inadl Kcnj10 Il16
Inadl Kcnj15 Il16
Inadl Grin2d Il16
Grin2a Lrp1 Ranbp9
Grin1 Nos1 Dmd
Grin1 Grm1 UBC
Grin1 Akap5 Pde10a
Grin1 Akap9 Tsnax
Grin1 Map2 Kndc1
Grin1 Actn2 Ldb3
Grin1 Spta1 Cbl
Grin1 Tjp1 Actn4
Grin1 Lrp1 Ranbp9
Csnk1e CSNK1E Trp53
Csnk1e AP2A1 Eps15
Csnk1e CRY2 Rfwd2
Dapk3 Ube2d3 Tnfaip3
Dapk3 Ube2d2a Rnf25
Hsp90ab1 Irf3 Trim26
Hsp90ab1 Nod2 Socs3
Hsp90ab1 Pgr Esr1
Six6 Gtf2a1l Tbpl1
CGN Tjp1 Actn4
Nrp1 Sema4f Dlg4
Nrp2 Sema4f Dlg4
Keap1 Atg7 Atg3
Senp2 PML Per2
HTT Hap1 Htt
Crebbp Polr1b Gmeb2
Crebbp Hdac10 Zfp236
Crebbp Pou1f1 Ncor1
Crebbp Zbtb17 Irf8
Htt Sptan1 Ywhab
Htt Vdac1 Itpr3
Htt Syn1 Prnp
Htt Alpl COL2A1
Htt PRNP Dpp6
Htt STX1A Snap23
Htt RAC1 Sh3rf1
Htt Mdh2 Cs
Ptpn11 Lilrb4 Ptpn6
Id3 Cops7a Sos1
S100a4 Metap2 Eif2a
Tnfrsf14 TRAF5 Srsf1
E2f1 Ndnl2 Nsmce1
Sgcb Sgcz Dmd
Mtor RPTOR Rheb
Raf1 Kndc1 Map2
RRN3 Polr1b Gmeb2
Polr1b Gmeb2 Zfp277


Ldb1 Pitx2 Kat5
Otx2 Cdk13 MTRR
Otx2 Crx Nr2e3
Otx2 Dido1 Polr2a
Otx2 Rbbp4 Dppa3
Otx2 Sfpq Per2
Otx2 Trip12 Sox6
Otx2 Pcbp1 Il6
Otx2 U2af2 Dppa3
Mixl1 Med16 Tardbp
Ar Actn2 Ldb3
Pou2f1 Polr1a Tcf3
Runx1t1 Neurog1 TCF3
Cited4 Rai14 Ssbp4
Cited4 Rbm39 NCOA6
Bach1 CBX3 Pax3
Bach1 SMARCA4 Trp53
Bach1 GTF2I Pias2
Bach1 KDM1A Mef2d
Bach1 CNOT1 Esr1
Bach1 KIF5B Ostm1
Bach1 RCC2 Rac1
Bach1 PIN1 Myb
Pcbd1 Polr1a Tcf3
Pcbd1 Asb3 Ssbp3
Pcbd1 Zdhhc13 Zbtb39
Brca1 Etv4 Rnf4
Ewsr1 Kdm5d Zbtb9
Ewsr1 Zfp111 Gmeb1
Ewsr1 Cers5 Wwtr1
Ewsr1 Asb6 Sh2b2
Ewsr1 Rai14 Ssbp4
Ewsr1 Hdac10 Zfp236
Ewsr1 Med16 Tardbp
Hoxa2 Asb12 Myocd
Hoxa2 Rai14 Ssbp4
Hoxa2 Rbm39 NCOA6
Olig1 Gtf2a1 Snapc4
Ciita Gmeb1 Zdhhc6
Ssbp3 Isl2 Zfp446
Ssbp3 Med16 Tardbp
Ssbp4 Isl2 Zfp446
Ssbp4 Med16 Tardbp
Runx2 Men1 Rnf20
Nkx2-3 Zfp263 Zfp768
Cebpa Ddit3 Cers2
Cebpa Eif4a3 EIF4A3
Cebpa Hp1bp3 Dppa3
Cebpa Lmnb1 Map1lc3b
Cebpa Rbm39 NCOA6
Cebpa Lbr Hist4h4
Cebpa Mcm4 Fancc
Cebpa Sptan1 Ywhab
Cebpa Rps3 Npm1
Cebpa Rnps1 Foxe3
Cebpa Sfpq Per2

Fxr1 Akap9 Tsnax
Fxr1 Clip1 Clasp1
Fxr1 Dido1 Polr2a
Fxr1 Ebf3 Ebf4
Fxr1 Golga3 Tsnax
Fxr1 Grik3 Grik2
Fxr1 Mllt4 Ryk
Fxr1 Ncor2 Pin1
Fxr1 Nop2 Dppa3
Fxr1 Scrib Vangl2
Fxr1 Ttc3 Cit
Fxr1 Zfp219 Hist1h2ai
Iqgap1 Clip1 Clasp1
Use1 Ubr2 Tex19.1
Dctn1 DERL1 Vcp
Anapc13 CDC16 Ppp5c
Anapc13 CDC27 Ppp5c
Anapc13 STX3 Snap23
Anapc13 MARCH6 Ube2g2
Haus4 GNAI2 Tuba1a
Cep152 CSNK1E Trp53
Cep152 E2F2 Sp1
Cep152 ATXN3 Ubr2
Cep152 RHOBTB1 Lrrc41
Cep152 ERBB2IP Pkp4
Cep152 GADD45GIP1 Nr4a1
Poc1b GNAI2 Tuba1a
Kif1c KIF1C Ccdc64
Prpf8 SNW1 Vdr
Nhp2l1 SART3 Usp15
Nhp2l1 KPNA2 Zbtb33
Soga1 PPP2R5A Ccng1
Soga1 MARK2 Crtc2
Mki67 KPNA2 Zbtb33
Gm9174 CDC27 Ppp5c
Gm9174 CDC16 Ppp5c
Cep76 GNAI2 Tuba1a
Cep350 VCL Raver1
AI837181 CANX Edem1
Haus1 GNAI2 Tuba1a
Mis12 CBX3 Pax3
Mis12 TRIM29 H1f0
Mis12 STX3 Snap23
Mis12 CBX1 Suv39h1
Mis12 CBX5 Pax3
Sgol1 PPP2R5A Ccng1
Cep72 PPP1R2 Nek2
Cep72 RUNX1 Cbfb
Cep72 CSNK1E Trp53
Mad2l1 INSR Stat5b
Mad2l1 CDC16 Ppp5c
Mad2l1 CDC27 Ppp5c
Mad2l1 BCLAF1 Fhl1
Mad2l1 DAPK3 Atf4
Ana

Wdtc1 Pcbp1 Il6
Wdtc1 Trip13 Chd4
Wdtc1 Cops3 Sos1
Wdtc1 Cops7a Sos1
Wdtc1 Slc25a13 Dync1i1
Wdtc1 Psme3 Mafa
Wdtc1 Hsd17b7 AHSG
Wdtc1 Gas7 Was
Wdtc1 Lims1 Ilk
Wdtc1 Pdlim2 RELA
Wdtc1 Fez2 Ndn
Wdtc1 Ptges2 Cebpb
Wdtc1 Luc7l Rsrc1
Wdtc1 Arfgap1 Lrrk2
Wdtc1 Gatad2b Nanog
Wdtc1 Lbr Hist4h4
Wdtc1 Ndc1 NDC1
Wdtc1 Pdgfra Prmt5
Wdtc1 Txn1 Dtnbp1
Wdtc1 Ifitm3 Atp6v0b
Wdtc1 Ift57 Ift88
Wdtc1 Cpt1a Bcl2
Wdtc1 Tuba1b DNAJA1
Lin7c DLG1 Myo6
Lin7c GNAI2 Tuba1a
Lin7c NF2 Schip1
Pex5l Ago1 Trim32
Pex5l Ap2b1 Eps15
Pex5l Gabbr2 Atf4
Pex5l Hcn4 Cav3
Pex5l Sptan1 Ywhab
Topors Psmc1 Ngly1
Grik1 Grik2 Grik3
Myo1c ACTN1 Grm5
Myo1c AP2A1 Eps15
Myo1c DAPK3 Atf4
Myo1c CTTN Hip1r
Myo1c EPS15 Numb
Myo1c SSH2 Cfl1
Klc3 KIF5B Ostm1
Syce2 Sycp1 Syce1
Plekhg5 Mpdz F11r
Cdk5 Cdk5r1 Suds3
Cdk5 Nefh Ndel1
Cdk5 HIST1H1A Uhrf1
Chrdl2 Bmp4 Htra1
Chrdl2 Gdf5 Htra1
Itga6 Sdc4 Itgb4
Lnx2 Fermt2 Flna
Lnx2 Kcnj10 Il16
Lnx2 Ctnnd2 Gsk3b
Lnx2 Sbf2 Mtmr2
Grasp Cyth3 Frmd4b
PID1 Lrp1 Ranbp9
Bcl7c MARK3 Crtc2
Bcl7c SMARCB1 Myc
Coro

GCG Tubb4a Lrrk2
GCG Hist1h1d Huwe1
GCG Gria3 Sqstm1
GCG Map2 Kndc1
GCG Scg2 Gga1
GCG Zranb1 APC
GCG Ywhaq Lrrk2
GCG Stmn2 Pcdhgb1
GCG Pcbp1 Il6
GCG Hnrnpd Cited4
GCG Ttn Ankrd1
GCG Pkn1 Zfand6
GCG Eif4a3 EIF4A3
Dppa3 CANX Edem1
Dppa3 CDK5 Suds3
Dppa3 YY1 Hoxa11
Dppa3 FHL1 Nfatc1
Dppa3 POLR2C Atf7ip
Dppa3 SIAH1 Myb


In [13]:
# H = G.subgraph(list(node_set))
# pos = nx.circular_layout(H)
# nx.draw(H, pos, cmap = plt.get_cmap('jet'), node_size = 2000)
# nx.draw_networkx_labels(H, pos)
# plt.show()

### 4 nodes chain

In [14]:
# pattern 2 search
for node1 in G.nodes:
    for node2 in G.neighbors(node1):
        if (G.out_degree(node2)>1 or G.out_degree(node2)>1 or node2==node1):
            continue
        else:
            for node3 in G.neighbors(node2):
                if (G.out_degree(node3)>1 or G.out_degree(node3)>1 or node3 in [node1, node2]):
                    continue
                else:
                    for node4 in G.neighbors(node3):
                        if(node4 not in [node1, node2, node3] and (node4 not in G.neighbors(node1)) and not(common_regulator(G, node1, node4))):
                            print(node1, node2, node3, node4)

Atg16l1 Kras Zhx2 Hras
Atg16l1 Polr2c Polr2j Pbx4
Map3k7 Map2k4 Mapk10 Mapkbp1
Ubtf Polr1b Gmeb2 Zfp277
Trappc2 Trappc5 Trappc3 Trappc1
Kat8 KAT8 Baz2a Smarca5
Actb CGN Tjp1 Actn4
Myh9 MARK3 Crtc2 YWHAB
Myh9 CGN Tjp1 Actn4
Myh9 SSH2 Cfl1 Ipo9
Apc Esr2 Rbm39 NCOA6
Hcn1 Mpdz F11r CASK
Fancd2 Slc25a4 BCLAF1 Fhl1
Fancd2 Txn1 Dtnbp1 Cmya5
Trp53 Topors Psmc1 Ngly1
Ubqln2 Ubqln1 Bcr Rac1
Tead2 Mpdz F11r CASK
Nphs2 Slc25a4 BCLAF1 Fhl1
Ywhaz Homer3 Grm1 UBC
Crebbp Polr1b Gmeb2 Zfp277
Htt RAC1 Sh3rf1 Shroom3
RRN3 Polr1b Gmeb2 Zfp277
Cldn1 Mpdz F11r CASK
ARL2BP Slc25a4 BCLAF1 Fhl1
App Ubqln1 Bcr Rac1
ADA2 Tada3 ESR1 Nsd1
TADA2A Tada3 ESR1 Nsd1
Ywhae Homer3 Grm1 UBC
Melk Txn1 Dtnbp1 Cmya5
Pafah1b1 ATXN3 Ubr2 Tex19.1
Ndn Tada3 ESR1 Nsd1
Frs2 PRKCI Dido1 Polr2a
Map3k1 Map2k4 Mapk10 Mapkbp1
Erbb2 Erbb3 L1cam Ranbp9
Ubc Topors Psmc1 Ngly1
Ubc Slc25a4 BCLAF1 Fhl1
Mapt Slc25a4 BCLAF1 Fhl1
Ctcf Polr1b Gmeb2 Zfp277
Eed Xrcc1 REV1 Mad2l2
Eed Slc25a4 BCLAF1 Fhl1
Eed Tomm20 Vdac1 Itpr3
Eed Txn1 Dtnbp1 Cmya5


### 5 nodes chain

In [15]:
# pattern 2 search
for node1 in G.nodes:
    for node2 in G.neighbors(node1):
        if (G.out_degree(node2)>1 or G.out_degree(node2)>1 or node2==node1):
            continue
        else:
            for node3 in G.neighbors(node2):
                if (G.out_degree(node3)>1 or G.out_degree(node3)>1 or node3 in [node1, node2]):
                    continue
                else:
                    for node4 in G.neighbors(node3):
                        if (G.out_degree(node4)>1 or G.out_degree(node4)>1 or node4 in [node1, node2, node3]):
                            continue
                        else:
                            for node5 in G.neighbors(node4):
                                if(node4 not in [node1, node2, node3, node4] and (node5 not in G.neighbors(node1)) and not(common_regulator(G, node1, node5))):
                                    print(node1, node2, node3, node4, node5)                            

Comment: No chain of length 5 found