## The code below generates real and random networks using all interaction information from hi_union_lit17_merged and save the interaction file in the folder Protein_networks. For the use of DomainEnrichment features in PRS and RRS though, the interactions found in the PRS have to be removed from the real network, so another set of interaction files have to be generated specific for PRS and RRS.

In [457]:
import pandas as pd
import numpy as np
import igraph, json
import matplotlib.pyplot as plt
from igraph import *

In [458]:
edges= pd.read_csv('hi_union_lit17_merged_only_human_reviewed_20210414.tsv', sep= '\t', index_col= 0)
edges.shape

(101219, 2)

In [459]:
edges.loc[(edges['uniprot_id_a'] == 'P08151') | (edges['uniprot_id_b'] == 'P08151')]

Unnamed: 0,uniprot_id_a,uniprot_id_b
22122,Q9UMX1,P08151
23914,P08151,Q13526
23915,P08151,Q8IWZ5
23916,P08151,P54646
23917,P08151,Q8NEC5
23918,P08151,Q96CN9
23919,P08151,P29972
75702,P08151,P17861
75703,P08151,Q7Z5J4
75704,P08151,Q9NRP7


In [460]:
real_network= Graph.DataFrame(edges, directed= False)
#real_network.vs['label']= real_network.vs['name']

In [461]:
for ind in real_network.neighbors('P08151'):
    print(real_network.vs[ind]['name'])

P17861
P29972
P54646
Q13526
Q7Z5J4
Q8IWZ5
Q8NEC5
Q96CN9
Q9NRP7
Q9UMX1


In [462]:
def make_random_graph(real_network):
    degree= real_network.degree()
    rand_graph= Graph.Degree_Sequence(degree, method= 'vl')
    rand_graph.vs['name']= real_network.vs['name']
    return rand_graph

In [463]:
rand_graphs= []
for _ in range(1000):
    rand_graphs.append(make_random_graph(real_network))
len(rand_graphs)

1000

In [464]:
for v in real_network.vs:
    prot= v['name']
    with open('../protein_sequences_and_features/human_protein_sequences_features/Protein_networks/' 
              + prot + '_real_rand_networks.txt', 'w') as f:
        f.write('network_id\tinteraction_partners\n')
        f.write('\t'.join(('0', '|'.join([real_network.vs[i]['name'] for i in real_network.neighbors(v)]))))
        f.write('\n')
        #print(prot, '|'.join([real_network.vs[i]['name'] for i in real_network.neighbors(v)]))
        for ind, g in enumerate(rand_graphs):
            neighbor_prots= [g.vs[i]['name'] for i in g.neighbors(v)]
            #print(prot, '|'.join([x for x in neighbor_prots]))
            f.write('\t'.join((str(ind + 1), '|'.join([x for x in neighbor_prots]))))
            f.write('\n')
        f.close()

In [113]:
for g in rand_graphs[:1]:
    print(g.get_edgelist())

[(0, 13361), (0, 12435), (0, 6389), (0, 9804), (0, 1661), (0, 6149), (0, 3077), (0, 9270), (0, 10343), (0, 11563), (0, 1421), (0, 3567), (0, 1666), (0, 7908), (0, 8202), (0, 1181), (0, 5522), (0, 1475), (0, 6840), (0, 4988), (0, 3964), (0, 3038), (0, 5576), (0, 8746), (0, 4944), (0, 8065), (0, 4018), (0, 8359), (0, 5434), (0, 3484), (0, 10701), (0, 608), (0, 8898), (0, 755), (0, 2852), (0, 6022), (0, 1926), (0, 9143), (0, 8168), (0, 2416), (0, 6463), (0, 10608), (0, 2612), (0, 7879), (0, 12171), (0, 6174), (0, 2127), (0, 4812), (0, 13004), (0, 8129), (0, 7350), (0, 280), (0, 3548), (0, 4670), (0, 5988), (0, 5552), (0, 6674), (0, 8820), (0, 677), (0, 775), (0, 11554), (0, 2436), (0, 3950), (0, 8095), (0, 7169), (0, 6733), (0, 12926), (0, 1711), (0, 6929), (0, 11074), (0, 4102), (0, 5077), (0, 11270), (0, 3078), (0, 1177), (0, 3274), (0, 251), (0, 3470), (0, 1422), (0, 496), (0, 11079), (0, 4984), (0, 5376), (0, 5425), (0, 7522), (0, 1819), (0, 11084), (0, 8208), (0, 12745), (0, 7674), (

In [130]:
real_network.neighbors(0)

[33,
 67,
 80,
 167,
 322,
 330,
 407,
 445,
 464,
 487,
 580,
 587,
 703,
 797,
 812,
 837,
 923,
 946,
 991,
 1058,
 1183,
 1207,
 1259,
 1328,
 1351,
 1352,
 1512,
 1636,
 1637,
 1646,
 1661,
 1682,
 1712,
 1720,
 1774,
 1808,
 1939,
 1958,
 1979,
 2040,
 2081,
 2136,
 2148,
 2153,
 2159,
 2189,
 2191,
 2192,
 2217,
 2226,
 2238,
 2253,
 2260,
 2272,
 2372,
 2435,
 2451,
 2491,
 2550,
 2555,
 2592,
 2647,
 2677,
 2697,
 2713,
 2747,
 2749,
 2828,
 2893,
 2918,
 2933,
 2979,
 3013,
 3075,
 3079,
 3095,
 3133,
 3230,
 3239,
 3299,
 3319,
 3340,
 3484,
 3530,
 3680,
 3697,
 3707,
 3807,
 3817,
 3852,
 3913,
 3964,
 4002,
 4019,
 4148,
 4154,
 4223,
 4256,
 4262,
 4382,
 4424,
 4425,
 4429,
 4438,
 4439,
 4440,
 4441,
 4444,
 4445,
 4446,
 4447,
 4590,
 4605,
 4690,
 4746,
 4925,
 4932,
 4985,
 5012,
 5073,
 5080,
 5142,
 5183,
 5308,
 5339,
 5349,
 5626,
 5693,
 5699,
 5734,
 5747,
 5773,
 5792,
 5793,
 5827,
 5937,
 5981,
 6190,
 6232,
 6240,
 6247,
 6266,
 6277,
 6278,
 6285,
 6336,


In [125]:
list(real_network.vs)

[igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 0, {'name': 'A0A087WUL8'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 1, {'name': 'A0A087X1G2'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 2, {'name': 'A0A096LP49'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 3, {'name': 'A0A0B4J2D5'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 4, {'name': 'A0A0B4J2F2'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 5, {'name': 'A0A0U1RQF7'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 6, {'name': 'A0A140G945'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 7, {'name': 'A0A1B0GTR3'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 8, {'name': 'A0A1B0GTZ2'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 9, {'name': 'A0A1B0GUV7'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 10, {'name': 'A0AUZ9'}),
 igraph.Vertex(<igraph.Graph object at 0x1342df8b0>, 11, {'name': 'A0AV02'}),
 igraph.Vertex(<igraph.Graph objec

In [129]:
for i in real_network.neighbors(0):
    print(i)

33
67
80
167
322
330
407
445
464
487
580
587
703
797
812
837
923
946
991
1058
1183
1207
1259
1328
1351
1352
1512
1636
1637
1646
1661
1682
1712
1720
1774
1808
1939
1958
1979
2040
2081
2136
2148
2153
2159
2189
2191
2192
2217
2226
2238
2253
2260
2272
2372
2435
2451
2491
2550
2555
2592
2647
2677
2697
2713
2747
2749
2828
2893
2918
2933
2979
3013
3075
3079
3095
3133
3230
3239
3299
3319
3340
3484
3530
3680
3697
3707
3807
3817
3852
3913
3964
4002
4019
4148
4154
4223
4256
4262
4382
4424
4425
4429
4438
4439
4440
4441
4444
4445
4446
4447
4590
4605
4690
4746
4925
4932
4985
5012
5073
5080
5142
5183
5308
5339
5349
5626
5693
5699
5734
5747
5773
5792
5793
5827
5937
5981
6190
6232
6240
6247
6266
6277
6278
6285
6336
6420
6597
6598
6605
6616
6618
6619
6620
6626
6627
6645
6647
6649
6650
6661
6668
6782
6784
6873
6892
6929
6931
6932
6955
7059
7099
7230
7262
7285
7293
7434
7499
7510
7677
7709
7751
7783
7815
7879
7936
7957
8004
8049
8088
8129
8178
8251
8322
8327
8405
8418
8473
8590
8618
8644
8680
8693
8696
87

In [145]:
edges.loc[edges['uniprot_id_a']== 'A0PJX0']

Unnamed: 0,uniprot_id_a,uniprot_id_b
47774,A0PJX0,Q6EMK4
47775,A0PJX0,Q86VE0
47776,A0PJX0,Q7Z3C6
47777,A0PJX0,Q96C00


## Below code generates protein network file specific for PRS and RRS DomainEnrichment calculation, by removing interactions found in PRS from the real network of a protein. This is to avoid circularity in the PRS network.

In [494]:
import pandas as pd
import numpy as np
import igraph, json
import matplotlib.pyplot as plt
from igraph import *

In [495]:
edges= pd.read_csv('hi_union_lit17_merged_only_human_reviewed_20210414.tsv', sep= '\t', index_col= 0)
print(edges.shape)
PRS= pd.read_csv('/Users/chopyanlee/Coding/Python/DMI/PRS/PRS_v3_only_human_with_pattern_alt_iso_swapped_removed_20210413.tsv', sep= '\t', index_col=0)
print(PRS.shape)

(101219, 2)
(898, 15)


In [496]:
edges.head()

Unnamed: 0,uniprot_id_a,uniprot_id_b
0,Q9H2S6,Q9NPE6
1,Q9H2S6,Q9BXK5
2,Q9H2S6,O60238
3,Q9H2S6,P20138
4,Q9H2S6,Q9UM44


In [497]:
PRS_intx= []
PRS_intx_ind= []
for i, r in PRS.iterrows():
    PRS_intx.append(tuple(sorted([r['interactorElm'], r['interactorDomain']])))
for i, r in edges.iterrows():
    intx= tuple(sorted([r['uniprot_id_a'], r['uniprot_id_b']]))
    if intx in PRS_intx:
        PRS_intx_ind.append(i)
len(PRS_intx_ind)

275

In [498]:
edges.loc[PRS_intx_ind, :]

Unnamed: 0,uniprot_id_a,uniprot_id_b
78,Q92934,Q07817
2656,Q16611,Q07817
2769,P60520,O60238
2783,P60520,Q13501
5869,P56524,Q9H9E1
...,...,...
103110,Q9H2K2,Q9NWV8
103112,Q9H2K2,Q9UIQ6
103782,Q9UBT6,Q9UBZ9
103796,Q9UBZ9,Q9UNA4


In [499]:
filtered_edges= edges.drop(index= PRS_intx_ind)
filtered_edges.shape

(100944, 2)

In [500]:
filtered_real_network= Graph.DataFrame(filtered_edges, directed= False)
real_network.vs['label']= real_network.vs['name']

In [501]:
for ind in filtered_real_network.neighbors('Q07817'):
    print(filtered_real_network.vs[ind]['name'])

O43889
P04049
P04637
P09467
P0C671
P10415
P10909
P13667
P21246
P22749
P27469
P36873
P45983
P49286
P49768
P49810
P52756
P55211
P55957
P56557
P60709
P62136
P63279
P78317
P99999
Q04724
Q07817
Q07817
Q08AM6
Q12983
Q13323
Q13625
Q13794
Q3B7T1
Q5T7V8
Q6NTF9
Q6PKG0
Q7Z465
Q7Z5B4
Q86XA0
Q8IV08
Q8IWV7
Q8IYJ2
Q8NBJ4
Q8NEY8
Q96LC9
Q96N67
Q96PG8
Q96PU4
Q99638
Q9GZR5
Q9H2K0
Q9H6H4
Q9HBF4
Q9HC38
Q9NQS1
Q9P2Y4
Q9UKT9
Q9UKY1
Q9Y282


In [502]:
def make_random_graph(real_network):
    degree= real_network.degree()
    rand_graph= Graph.Degree_Sequence(degree, method= 'vl')
    rand_graph.vs['name']= real_network.vs['name']
    return rand_graph

In [505]:
rand_graphs= []
for _ in range(1000):
    rand_graphs.append(make_random_graph(filtered_real_network))
len(rand_graphs)

1000

In [510]:
# come back to the PPI file to add make sure that these edges as well as PRS PPI are added into known PPI file
with open('/Users/chopyanlee/Coding/Python/DMI/protein_interaction_prediction/PRS_IntAct_union_known_PPIs.txt', 'r') as f:
    lines= [line.strip() for line in f.readlines()]
IntAct= []
for line in lines:
    tabs= line.split('\t')
    IntAct.append(tuple(sorted([tabs[0], tabs[1]])))

In [517]:
hi_union_lit= []
for i, r in edges.iterrows():
    hi_union_lit.append(tuple(sorted([r['uniprot_id_a'], r['uniprot_id_b']])))
PRS_union_hi_union_lit= set(PRS_intx).union(set(hi_union_lit))

In [518]:
len(PRS_union_hi_union_lit.union(set(IntAct)))

175622

In [520]:
with open('/Users/chopyanlee/Coding/Python/DMI/protein_interaction_prediction/PRS_hi_lit17_IntAct_known_PPIs_20210427.txt', 'w') as f:
    for ele in PRS_union_hi_union_lit.union(set(IntAct)):
        f.write('\t'.join([e for e in ele]))
        f.write('\n')

In [507]:
for v in filtered_real_network.vs:
    prot= v['name']
    with open('../protein_sequences_and_features/human_protein_sequences_features/Protein_networks_PRS_filtered/' 
              + prot + '_real_rand_networks.txt', 'w') as f:
        f.write('network_id\tinteraction_partners\n')
        f.write('\t'.join(('0', '|'.join([filtered_real_network.vs[i]['name'] for i in filtered_real_network.neighbors(v)]))))
        f.write('\n')
        #print(prot, '|'.join([real_network.vs[i]['name'] for i in real_network.neighbors(v)]))
        for ind, g in enumerate(rand_graphs):
            neighbor_prots= [g.vs[i]['name'] for i in g.neighbors(v)]
            #print(prot, '|'.join([x for x in neighbor_prots]))
            f.write('\t'.join((str(ind + 1), '|'.join([x for x in neighbor_prots]))))
            f.write('\n')
        f.close()