# ORGANISM: Drosophila_melanogaster
# Datasource: https://downloads.thebiogrid.org/BioGRID/Release-Archive/BIOGRID-3.5.182/

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
df_original = pd.read_csv("data/BIOGRID-ORGANISM-Drosophila_melanogaster-3.5.182.tab2.txt", sep='\t')

In [3]:
df_original.head()

Unnamed: 0,#BioGRID Interaction ID,Entrez Gene Interactor A,Entrez Gene Interactor B,BioGRID ID Interactor A,BioGRID ID Interactor B,Systematic Name Interactor A,Systematic Name Interactor B,Official Symbol Interactor A,Official Symbol Interactor B,Synonyms Interactor A,...,Pubmed ID,Organism Interactor A,Organism Interactor B,Throughput,Score,Modification,Phenotypes,Qualifications,Tags,Source Database
0,30772,43383,37006,68259,62692,Dmel_CG10002,Dmel_CG6459,fkh,P32,CG10002|DmelCG10002|Dmfkh|Sebp2|dFoxA,...,14605208,7227,7227,High Throughput,-,-,-,-,-,BIOGRID
1,30773,41450,35735,66567,61601,Dmel_CG10007,Dmel_CG11205,Tango9,phr,87A7-9/5|CG10007|DmelCG10007|anon-87Ad,...,14605208,7227,7227,High Throughput,-,-,-,-,-,BIOGRID
2,30774,43384,35808,68260,61662,Dmel_CG10009,Dmel_CG11635,Noa36,CG11635,CG10009|DmelCG10009|NOA 36,...,14605208,7227,7227,High Throughput,-,-,-,-,-,BIOGRID
3,30775,43384,31396,68260,57913,Dmel_CG10009,Dmel_CG3062,Noa36,CG3062,CG10009|DmelCG10009|NOA 36,...,14605208,7227,7227,High Throughput,-,-,-,-,-,BIOGRID
4,30776,41481,36329,66594,62106,Dmel_CG10014,Dmel_CG8472,CG10014,Cam,DmelCG10014,...,14605208,7227,7227,High Throughput,-,-,-,-,-,BIOGRID


In [4]:
df = df_original[["Official Symbol Interactor A", "Official Symbol Interactor B"]]

In [5]:
df.head()

Unnamed: 0,Official Symbol Interactor A,Official Symbol Interactor B
0,fkh,P32
1,Tango9,phr
2,Noa36,CG11635
3,Noa36,CG3062
4,CG10014,Cam


In [6]:
G = nx.from_pandas_edgelist(df, 'Official Symbol Interactor A', 'Official Symbol Interactor B', create_using=nx.DiGraph())

In [7]:
print(G.is_directed(), len(G.nodes), len(G.edges()))

True 9345 65390


In [8]:
# deleting self-loop edges
for n, nbrs in G.adj.items():
    if n in nbrs:
        G.remove_edge(n,n)

In [9]:
print(G.is_directed(), len(G.nodes), len(G.edges()))

True 9345 64755


### 3 nodes chain excluding common regulator 

In [10]:
def common_regulator(G, node1, node2):
    in_node1 = [a[0] for a in G.in_edges(node1)]
    in_node2 = [b[0] for b in G.in_edges(node2)]
    for i in in_node1:
        if i in in_node2:
            return True
        return False

In [11]:
common_regulator(G,'marA','putP')

False

In [12]:
node_set = set() # to save the node list
for node1 in G.nodes:
    for node2 in G.neighbors(node1):
        if (G.out_degree(node2)>1 or G.out_degree(node2)>1 or node2==node1):
            continue
        else:
            for node3 in G.neighbors(node2):
                if(node3 not in [node1, node2] and (node3 not in G.neighbors(node1)) and not(common_regulator(G, node1, node3))): 
                    print(node1, node2, node3)
                    node_set.add(node1); node_set.add(node2); node_set.add(node3);

fkh CR31054 mus304
P32 CG8435 CG9426
P32 CG13454 ATPCL
phr CG2889 aru
Noa36 CG3062 Traf-like
Noa36 Syt14 Cyp1
CG11635 Mst84Dd CG34449
CG11635 CG18262 AMPdeam
Cam ms(3)76Cc Ndc80
Cam RyR CG14315
Cam CanA-14F sra
drm CG7101 Jwa
Pex5 CG9527 A16
CG17666 CG4151 NimC1
CG17666 CG5011 Trs33
CG17666 CG18180 CG32036
CG17666 CG2127 NimC1
CG17666 CG2144 dpr17
CG17666 CG5866 NimC1
CG17666 ssp6 NimC1
CG17666 alc norpA
CG43773 Sans l(2)k09913
MBD-R2 CG42678 CG3651
RtGEF CG18659 CG18810
RtGEF CG31245 CG3225
Pak CG15861 CG34376
Pak CG18031 4E-T
Pak CkIIbeta2 CkIIalpha
Pak Ssl CkIIalpha
CG11876 eIF2B-beta Ada2b
CG11876 alc norpA
Galphao CG9426 Cul3
Galphao Plc21C Galphaq
sala bmm Lsd-2
CG10051 CG4461 HtrA2
CG14354 Lcp2 CG8929
rl Lpin Tor
rl CG45186 CG5009
rl CG9391 Trx-2
cm Rpt4R CG9588
CtBP disco CG9986
emc sage CG4168
prod CG2652 eIF-5A
HLH4C CG42235 Pk17E
HLH4C CG32428 Hn
HLH4C CG8414 CG9253
HLH4C Lcp2 CG8929
HLH4C CG9173 Rbp4
sc sr ap
RpL29 CG17385 CG8701
FER CG9014 LysX
CG11322 CG3919 Ak6
bsk Sln l

Fer1HCH CG32698 Ca-P60A
Fer1HCH yip7 Prosbeta5R1
Fer1HCH CG6749 krz
Fer1HCH PPO1 Jafrac1
Arr1 CG6175 CG8079
Arr1 CG8924 EloC
Crc CG6726 CG7264
CG13151 CG15916 RpL18A
Traf6 brn da
Traf6 CG8188 aop
Hip1 CG9975 Rbp4
CG14410 Lcp2 CG8929
CG14410 CG17190 Her
CG14410 CG5532 CG8292
CG14410 26-29-p Rbp4
Sirt7 ergic53 yellow-f
CG10979 Cyp6v1 RpS25
CG10979 Arpc3B glob1
Dap160 CG9527 A16
RASSF8 IntS8 Stam
RASSF8 CG8188 aop
RASSF8 CG9951 CG7168
FK506-bp2 Capa CG7191
FK506-bp2 CG10657 Arp1
FK506-bp2 CG2652 eIF-5A
CG11486 CG8924 EloC
CG11486 CG7600 MED4
CG11486 CG9426 Cul3
CG11486 SclA Ca-P60A
Cks85A CG5731 Zasp66
CG43894 rogdi Cam
Ref1 CG9775 Syp
CG11409 CG33056 CG5555
CG11409 CG34411 CG7379
Kdm2 olf413 TFAM
GM130 CG5334 CG9941
GM130 CG9426 Cul3
Cpr64Ad CG13674 LpR1
Cpr64Ad CG15211 sns
Rfabg PPO1 Jafrac1
blw CG3321 Neos
CG11076 CG9890 RpS29
PlexA CG5866 NimC1
PlexA Sema-1b Lis-1
CG15293 CG40045 RASSF8
Gadd45 Hnf4 Sir2
Gadd45 CG2144 dpr17
Nup93-1 Tps1 Sap47
Nup93-1 Nup154 CG5642
dsx CG13492 CG15370
d

Ef1alpha48D CG9902 Art6
CG12477 CG4270 Gp210
SmF snRNP-U1-C Art6
CG34355 SppL CG42369
SppL CG42369 pum
Sec23 CG8851 CG9170
Sec23 CG42678 CG3651
Arc1 CG34382 par-6
Pi3K21B CG11417 RpS11
Pi3K21B larp pAbp
Pi3K21B Psa CG12782
Pi3K21B Nopp140 Sod3
CG17829 CG7101 Jwa
CG17829 Ser7 Neto
CG17829 CG2120 CkIIalpha-i1
CG17829 chrb CG8368
Tpc1 CG7768 CG4936
CG12576 Tsp42Ek zf30C
CG12576 CG14606 Ufd1-like
CG3085 CG33095 CG33275
CG3085 mRpS30 mRpL23
CG3085 NLaz CG34404
Cka Cpr73D Nrt
CG13083 CG5913 CG31249
CG13083 Pnn CG9636
CG15370 CG9975 Rbp4
CG15771 Mst84Dd CG34449
CG15771 NimB2 CG10527
ssx NimB2 CG10527
ssx Lcp2 CG8929
Rab26 CG13531 Sxl
CG12607 Pxt CG8368
CG15422 CG17600 D1
CG15422 snRNP-U1-C Art6
CG15422 CG7381 CG8854
CG15545 Tsp97E eIF4E-5
CG2865 Ak6 maf-S
CG12672 SP555 EloC
Cyp28d2 CG6656 Poxm
DCX-EMAP nau kn
DCX-EMAP vimar Miro
CG12679 Mst84Dd CG34449
CG17377 disco CG9986
Rbp9 BHD Rpt4
Rbp9 AP-1gamma Afti
Pmp70 CG9547 CG9925
trbl CG8679 baf
eIF4AIII CG8414 CG9253
eIF4AIII Pnn CG9636
CG12721 

CG15646 Arpc3B glob1
raps Lcp2 CG8929
wisp CG42369 pum
wisp CG8180 Nedd4
wisp mir-312 sha
CG15741 ABCB7 CG11836
ABCB7 CG11836 Ndae1
hale Axs lwr
hale CG9836 MAPk-Ak2
Fbp2 CG32219 whd
l(3)03670 CG8004 CG9393
Vsx2 NimB2 CG10527
CG4835 CG9975 Rbp4
Clamp Nelf-A NimC1
Mhcl Pxt CG8368
Mhcl 26-29-p Rbp4
RpL18A CG8414 CG9253
IP3K2 dlt CycE
IP3K2 ABCB7 CG11836
IP3K2 CG8929 E5
Nrg Ak6 maf-S
Ak6 maf-S cnc
CG1635 P5cr-2 CG7379
CG1635 Rif1 Pp1-87B
CG1635 CG5608 Nup62
CG31961 CG8851 CG9170
CG7878 larp pAbp
Atg5 Cchl CG9624
CG1647 CG8569 sip2
sofe ATbp Sema-2a
CG16719 CG6688 cona
gammaTub37C swa ctp
alphaTub67C swa ctp
Srp72 CG32344 tum
Art8 CG14450 CG7135
Art8 CG18789 SdhA
CG2278 CG5537 Prp3
CG3213 CG8851 CG9170
CG16781 CG4955 CG5906
CG16826 pen c(3)G
CG16826 Pgk TER94
CG16826 PPO1 Jafrac1
CG16886 CG9975 Rbp4
Hexim CG9527 A16
E(spl)m8-HLH CG32219 whd
CG16972 CG5611 CG5844
CG16972 CG4751 CG9986
CG5611 CG5844 spz3
Cpr62Bc CG7813 CG9083
GstT3 Cchl CG9624
CG32486 Aef1 CG9986
CG32486 CG9975 Rbp4
Ada2b CG

Ote CG10214 CG7222
CG5681 CG7834 wal
CG5726 CG9426 Cul3
CG7236 Liprin-gamma Lar
CG5807 CG42575 futsch
AP-1sigma AP-1gamma Afti
Ime4 CG9014 LysX
Ime4 Pnn CG9636
Nelf-E Pol32 CG5846
Nelf-E Nelf-A NimC1
Arl1 Tom70 CG8602
E(spl)m5-HLH tap dsh
RpL9 CG42369 pum
Sec10 CG18262 AMPdeam
Sec10 gammaCOP pio
Sec10 JhI-1 CG4887
fu l(1)G0196 sip1
fu RIOK1 p53
fu CG1233 CG1458
fu CG17612 row
fu CG45186 CG5009
baf CG10214 CG7222
Rint1 CG9170 slam
CG6439 A16 sds22
CG6439 CG31344 RpS10b
endos CG9014 LysX
endos CG7168 RpL37a
endos elgi Myo10A
dik CG42678 CG3651
Nlp CG13983 CG34132
Cp16 ms(3)76Cc Ndc80
Mst33A CG9527 A16
CG7967 CG9527 A16
CG7967 hng2 sina
CG6567 mRpL45 Pxt
mRpL45 Pxt CG8368
CG6674 CG12320 ecd
CG6686 CG31712 l(2)37Cb
Osbp Rab40 EloC
ovo CG5382 l(2)01289
nAChRalpha2 CG9836 MAPk-Ak2
Mst89B Pxt CG8368
Snmp1 Or83c CG14741
Cpr66Cb vlc CG9977
cdc14 SP2637 didum
cdc14 CG5731 Zasp66
plu CG15043 Pect
plu CG15861 CG34376
plu Pino scrib
plu Np Hsp60B
CG7231 CG7903 Atx2
Trn Ran-like CG8417
Trn CG9330 Ad

mRpS24 CG9426 Cul3
Sirt2 SP2637 didum
Sirt2 CG5731 Zasp66
Arl2 CG31961 CG8851
Arl2 betaTub97EF Vhl
CG5080 CG9951 CG7168
wech Ilk parvin
TwdlN CG42369 pum
Tm2 CG9170 slam
D CG42369 pum
D ind gro
CG14062 l(3)L1231 M1BP
CG14118 pod1 Cpn
Pi3K92E Trpm CG34383
Pi3K92E RIOK1 p53
Pi3K92E srl Ets97D
Pi3K92E CG18031 4E-T
Pi3K92E iPLA2-VIA CG8235
Pi3K92E eIF3-S10 CG9769
Pi3K92E larp pAbp
EndoB Paip2 pAbp
alpha-Cat CG3476 Obp56h
ash2 Ncoa6 yki
Mef2 CG45186 CG5009
Mpc1 Cyp6v1 RpS25
Mpc1 CG6443 mts
CG15572 pod1 Cpn
CR17567 Xpd CycB
CG18787 CG18789 SdhA
CG4407 CG32698 Ca-P60A
coro mRpL23 mRpL1
ApepP CG9684 CG6283
Glt CG17765 Hil
Non1 Spn42Dc Acox57D-d
Non1 CG32344 tum
CG14881 CG45050 CG7987
CG14881 CG5793 CG7048
CycA SP2637 didum
CycA CG5731 Zasp66
CycA Np Hsp60B
Pak3 CG42369 pum
Pak3 CkIIbeta2 CkIIalpha
Pak3 Ssl CkIIalpha
KdelR Spt-I Rab1
RFeSP CG9121 betaTub56D
mtSSB CG10214 CG7222
sn qua spoon
vkg CG11395 Gbp
spoon CG7414 CG9173
spoon larp pAbp
CG15098 toe CG3880
Dad Lpin Tor
Dad Hip14 sog
tko srl

pnr svr CG9083
pnr amn rut
pnr H15 Set1
pnr tai ab
pnr RhoGEF64C Rho1
pnr chrb CG8368
pnr CG8116 Pnn
pnr Hmt-1 CG8746
pnr CG31176 CG7886
pnr glec Pk17E
pnr mir-278 Pi3K92E
Ptp99A CG11110 E5
mam Non2 CG16972
mam CG17612 row
mam Letm1 Jon65Aiv
mam Ten-a Ten-m
mam Mrp4 puc
dsh Pkc53E ik2
Sufu fdl Glycogenin
CG34340 YL-1 Mcr
nub NimB2 CG10527
nub CG32767 yellow-g
nub CG8360 CG7920
CG43729 CG13865 CG9319
CG43729 Tps1 Sap47
CG43729 CG4557 Rab6
Hmx 26-29-p Rbp4
CG11700 CG5958 Ubi-p5E
CG34353 CG13671 Cp15
Phb2 CG7979 CG34205
cv-c Plap P5CDh1
CG34347 CG4270 Gp210
Rgk3 Ten-a Ten-m
rdgA bc10 Zpr1
CG15047 maf-S cnc
RasGAP1 vlc CG9977
RasGAP1 larp pAbp
Tle4 Optix ey
sqh bbg kst
Vamp7 CG9426 Cul3
Exo70 CG6424 CG7196
Exo70 CG4447 CG9577
Exo70 CG9951 CG7168
Arf51F CG7427 CG13322
Arf51F Unc-13-4B CG5359
Arf51F CG6118 CG6912
Arf51F plx Cam
Arf51F amn rut
Exo84 CG2182 CG7264
Exo84 CG3259 Tpc1
Exo84 CG34422 bdg
Exo84 unc-13 Cam
Exo84 Sec8 exd
Snm1 CG13096 CG8545
Pms2 CG1815 OS9
Pms2 CG44774 CG7379
Pms2 CG

In [13]:
# H = G.subgraph(list(node_set))
# pos = nx.circular_layout(H)
# nx.draw(H, pos, cmap = plt.get_cmap('jet'), node_size = 2000)
# nx.draw_networkx_labels(H, pos)
# plt.show()

### 4 nodes chain

In [14]:
# pattern 2 search
for node1 in G.nodes:
    for node2 in G.neighbors(node1):
        if (G.out_degree(node2)>1 or G.out_degree(node2)>1 or node2==node1):
            continue
        else:
            for node3 in G.neighbors(node2):
                if (G.out_degree(node3)>1 or G.out_degree(node3)>1 or node3 in [node1, node2]):
                    continue
                else:
                    for node4 in G.neighbors(node3):
                        if(node4 not in [node1, node2, node3] and (node4 not in G.neighbors(node1)) and not(common_regulator(G, node1, node4))):
                            print(node1, node2, node3, node4)

P32 CG8435 CG9426 Cul3
Pex5 CG9527 A16 sds22
CG14354 Lcp2 CG8929 E5
HLH4C Lcp2 CG8929 E5
CG11322 CG3919 Ak6 maf-S
Egfr eRF1 Sap47 Rbp4
Traf-like CG8435 CG9426 Cul3
Traf-like CG8851 CG9170 slam
14-3-3zeta mRpS30 mRpL23 mRpL1
ple Plap P5CDh1 plu
Src64B mRpS30 mRpL23 mRpL1
Spred ms(3)K81 CG9426 Cul3
CG12470 Mtap CG12567 Cpr66D
Nha1 CG14957 upSET CG9796
CycG CG31961 CG8851 CG9170
CycG CG4365 YL-1 Mcr
CycG mRpS30 mRpL23 mRpL1
rdx CG8360 CG7920 kdn
Hakai CG14957 upSET CG9796
Zif Mdh1 CG9014 LysX
CG7879 CG8407 Cpr73D Nrt
su(Hw) Mtpalpha CG7407 CG9288
NC2alpha CG9951 CG7168 RpL37a
spn-F CG15549 Pnn CG9636
spn-F CG8116 Pnn CG9636
spn-F CG8141 Pnn CG9636
NC2beta kuk CG8929 E5
CG31211 Ing3 CG9527 A16
CG6425 Taz iPLA2-VIA CG8235
ATPsyn-beta Cpr78E CG3321 Neos
ref(2)P GstE8 jdp Hn
Dlc90F CG7208 lush Or67d
CG10494 CG5681 CG7834 wal
Hrb27C CG33056 CG5555 CG6333
sxc Lcp2 CG8929 E5
Chi Lcp2 CG8929 E5
CG3309 CG7156 ldbr CG13484
zen CG9527 A16 sds22
Snr1 Mcr LRP1 LanB1
Blos2 EndoB Paip2 pAbp
Blos2 CG9951

### 5 nodes chain

In [15]:
# pattern 2 search
for node1 in G.nodes:
    for node2 in G.neighbors(node1):
        if (G.out_degree(node2)>1 or G.out_degree(node2)>1 or node2==node1):
            continue
        else:
            for node3 in G.neighbors(node2):
                if (G.out_degree(node3)>1 or G.out_degree(node3)>1 or node3 in [node1, node2]):
                    continue
                else:
                    for node4 in G.neighbors(node3):
                        if (G.out_degree(node4)>1 or G.out_degree(node4)>1 or node4 in [node1, node2, node3]):
                            continue
                        else:
                            for node5 in G.neighbors(node4):
                                if(node4 not in [node1, node2, node3, node4] and (node5 not in G.neighbors(node1)) and not(common_regulator(G, node1, node5))):
                                    print(node1, node2, node3, node4, node5)                            

Comment: No chain of length 5 found