#Mounting Google Drive Files

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


#Importing Python Packages

In [0]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from statistics import mean,mode,median 
import csv
%matplotlib inline

#Loading the Signor 2.0 Database

In [0]:
dataset=pd.read_csv("/content/drive/My Drive/BioInformatics/Protein-Protein Interaction/SignorDataBase.tsv",delimiter="\t")

In [7]:
dataset.head()

Unnamed: 0,ENTITYA,TYPEA,IDA,DATABASEA,ENTITYB,TYPEB,IDB,DATABASEB,EFFECT,MECHANISM,RESIDUE,SEQUENCE,TAX_ID,CELL_DATA,TISSUE_DATA,MODULATOR_COMPLEX,TARGET_COMPLEX,MODIFICATIONA,MODASEQ,MODIFICATIONB,MODBSEQ,PMID,DIRECT,NOTES,ANNOTATOR,SENTENCE,SIGNOR_ID
0,PLCB2,protein,Q00722,UNIPROT,superoxide,smallmolecule,CHEBI:18421,ChEBI,up-regulates quantity,,,,,,,,,,,,,23994464,NO,,apalma,The PI3Kγ pathway (but not PLCβ2/3) is require...,SIGNOR-255013
1,PLCB3,protein,Q01970,UNIPROT,"1D-myo-inositol 1,4,5-trisphosphate",smallmolecule,CHEBI:16595,ChEBI,up-regulates quantity,small molecule catalysis,,,9606.0,,,,,,,,,23994464,NO,,apalma,The first phase of this signal is likely media...,SIGNOR-255018
2,RORC,protein,P51449,UNIPROT,IL17A,protein,Q16552,UNIPROT,up-regulates,transcriptional regulation,,,9606.0,,,,,,,,,16990136,YES,,mrosina,We found that RORgt is required for the consti...,SIGNOR-255029
3,RORC,protein,P51449,UNIPROT,Th17,phenotype,SIGNOR-PH94,SIGNOR,up-regulates,,,,9606.0,,,,,,,,,16990136,YES,,MARCO ROSINA,Our results demonstrate that RORgt is the tran...,SIGNOR-255028
4,SRC,proteinfamily,SIGNOR-PF32,SIGNOR,Chemotaxis,phenotype,SIGNOR-PH93,SIGNOR,up-regulates,,,,9606.0,BTO:0000830,,,,,,,,15526160,NO,,apalma,The SFK family of tyrosine kinases is named af...,SIGNOR-254996


In [0]:
source = list(dataset["ENTITYA"])
type_A = list(dataset["TYPEA"])
target = list(dataset["ENTITYB"])
type_B = list(dataset["TYPEB"])
interaction = list(dataset["MECHANISM"])
regulation = list(dataset["EFFECT"])
direct = list(dataset["DIRECT"])

##Save specific columns to a file I

In [0]:
saved_proteins = []
with open('signor_pairs.txt', 'w') as filehandle:
  for i in range(len(source)):
    first= source[i]
    first = first.replace(" ","_")
    second = target[i]
    second = second.replace(" ","_")
    third = regulation[i]
    fourth = interaction[i]
    save = tuple([first,second,third,fourth])
    if save in saved_proteins:
      continue
    else:
      saved_proteins.append(save)
    if (type_A[i] or type_B[i]) != ("protein" or "complex" or "proteinfamily"):
      continue
    elif direct[i] == "NO":
      continue
    else:
      filehandle.writelines("%s\t%s\t%s\t%s\n" % (first,second,third,fourth))

In [0]:
data_signor=pd.read_csv("signor_pairs.txt",delimiter="\t", names = ["source","target","regulation","interaction"])

##Save specific columns to a file II

In [0]:
source2 = list(data_signor["source"])
target2 = list(data_signor["target"])
regulation2 = list(data_signor["regulation"])
interaction2 = list(data_signor["interaction"])

with open('signor_edges.txt', 'w') as filehandle:
  for i in range(len(source2)):
    first = source2[i]
    second = target2[i]
    filehandle.writelines("%s\t%s\n" % (first,second))

In [31]:
len(source2)

10698

#Load network into a directed graph

In [0]:
P = nx.read_edgelist('signor_edges.txt', create_using = nx.DiGraph)

In [0]:
protein_pairs = []
for i in range(len(source2)):
  a = source2[i]
  b = target2[i]
  ab = tuple([a,b])
  protein_pairs.append(ab)

In [15]:
len(P.edges)

9939

##Dictionary for PPI and its regulation

In [0]:
inter_dict = dict(zip(protein_pairs,regulation2))

##Dictionary for PPI and the mechanism

In [0]:
mech_dict = dict(zip(protein_pairs,interaction2))

#Trimming the Network

In [0]:
up ='up-regulates'
down = 'down-regulates'
com = 'complex'

### Pathways checked
**Pathway 1 (MAPK pathway):** ERBB2, SHC1, GRB2, SOS1, HRAS, RAF1, MAP2K1, MAPK1, MAPK3, ERBB3, MYC, ELK1, FOS, JUN \
**Pathway 2 (PI3K pathway):** PIK3CA, PIK3R1, PTEN, AKT1, MTOR, RPS6KB1, RPS6KB2, ERBB2, ERBB3, VEGFA, TP53,FOXO



###NOTE: Since the MCDS was large and majority of the members are already included. Only ERBB2 was used for this test

In [0]:
MAPK_nodes = set(['ERBB2', 'SHC1', 'GRB2', 'SOS1', 'HRAS', 'RAF1', 'MAP2K1', 'MAPK1', 'MAPK3'])
PI3K_nodes = set(['PIK3CA', 'PIK3R1', 'PTEN', 'AKT1', 'MTOR', 'RPS6KB1', 'RPS6KB2'])

In [0]:
MAPK_path= set()
PI3K_path = set()

#Network Trimming

##Determining immediate neighbors of starting node

In [0]:
def state_convert(x):
  if up in x:
    return up
  elif down in x:
    return down
  elif com in x:
    return com
  else:
    return x

##Trimming network with the ff. rules:
* The **succeeding/preceeding node** must have the **same type of regulation** as the node of interest, else trim it off **unless** \
* The connection is a **binding mechanism** \
* The **succeeding/preceeding node** is a **complex** \
* **Parameters are easily configured**

In [20]:
trim_net = []
direction = []
regul = []
  
start_node = 'FOXM1'
in_edge = set(P.in_edges(start_node))
out_edge = set(P.out_edges(start_node))
edge_tot = in_edge.union(out_edge)    

for edge in edge_tot:
  try:
    reg = inter_dict[edge]
    state = state_convert(reg)
    #print(state)
    regul.append(state)
    trim_net.append(edge)
  except KeyError:
    edge_inv = tuple([edge[1], edge[0]])
    reg_inv = inter_dict[edge_inv]
    state = state_convert(reg_inv)
    print(state)
    regul.append(state)
    trim_net.append(edge_inv)

level2 = dict(zip(trim_net,regul))
len(level2)

5

In [0]:
prev_level = set(level2.keys())
trimNET = {}
trimNET.update(level2)
continue_check = True 
level = 3

In [22]:
while continue_check:
  nodules = {}
  for neighbor in prev_level:
    basis_regul = trimNET[neighbor]
    if neighbor in P.in_edges():
      inward = set(P.in_edges(neighbor[0]))
      outward = set(P.in_edges(neighbor[0]))
      ed = inward.union(outward)
    elif neighbor in P.out_edges():
      inward = set(P.in_edges(neighbor[1]))
      outward = set(P.out_edges(neighbor[1]))
      ed = inward.union(outward)
    for member in ed:
      try:
        subnet = []
        regulation = []
        reg_member = inter_dict[member]
        state = state_convert(reg_member)
        if (state == basis_regul) or (state == com) or (mech_dict[member] == 'binding'):
          if member not in subnet:
            subnet.append(member)
            regulation.append(state)
            updt = dict(zip(subnet,regulation))
            nodules.update(updt)
      except KeyError:
        subnet = []
        regulation = []
        a = tuple([member[1],member[0]])
        rr = inter_dict[a]
        state = state_convert(rr)
        if (state == basis_regul) or (state == com) or (mech_dict[member] == 'binding'):
          if a not in subnet:
            subnet.append(a)
            regulation.append(state)
            updt = dict(zip(subnet,regulation))
            nodules.update(updt)
            
  if level == 3:
    print("Level",level)
    print(len(nodules))
    A = set(level2.keys())
    B = set(nodules.keys())
    A_B = set(A & B)
    for rem in A_B:
      nodules.pop(rem)
    print(len(nodules))
    prev_level = set(nodules.keys())
    onion = A.union(B)
    trimNET.update(nodules)
    level +=1
    
  else:
    print("Level",level)
    print(len(nodules))
    B = set(nodules.keys())
    intersection = set(onion & B)
    for rem in intersection:
      nodules.pop(rem)
    print(len(nodules))
    prev_level = set(nodules.keys())
    trimNET.update(nodules)
    onion = onion.union(B)
    level +=1
    if len(nodules) == 0:
      continue_check = False
     

Level 3
41
41
Level 4
163
136
Level 5
515
411
Level 6
1319
923
Level 7
1835
975
Level 8
2139
651
Level 9
1479
434
Level 10
1450
193
Level 11
687
99
Level 12
457
74
Level 13
401
35
Level 14
87
1
Level 15
3
0


#Appending to a text file

In [0]:
trimNETKeys = set(trimNET.keys())

with open('trimmedNet.txt', 'w') as filehandle:
  for x in trimNETKeys:
    first= x[0]
    second = x[1]
    filehandle.writelines("%s   %s \n" % (first,second))

In [0]:
Pp = nx.read_edgelist('trimmedNet.txt', create_using = nx.DiGraph)

In [0]:
relevant_edges = []
ed1 = set(Pp.nodes())
for x in ed1:
  a0 = set(P.in_edges(x))
  b0 = set(P.out_edges(x))
  c0 = a0.union(b0)
  for elem in c0:
    elem0 = elem[0]
    elem1 = elem[1]
    if (elem0 and elem1) in ed1:
      relevant_edges.append(tuple([elem0,elem1]))
    else:
      #print(elem,' is not a member of MCDS pairs!.')
      continue

#MCDS Calculation

In [26]:
# INITIALIZATION OF CODE SNIPET
V = set(Pp.nodes()) #Sets V as all nodes
# CHOOSE A NODE V WITH HIGHEST DEGREE
v = start_node 
MCDS = set([v]) #Outputs Node with most connections
inward = set(Pp.in_edges(v))
outward = set(Pp.out_edges(v))
W = set()
for x in inward:
  W.add(x[0])
for y in outward:
  W.add(y[1])
#W = set(Pp[v]) #Saves Nodes which are connected to node with most connections
U = set([v]) | W
while U != V:
  w = None
  w_length = 0
  for node in W:
    inward2 = set(Pp.in_edges(node))
    outward2 = set(Pp.out_edges(node))
    Gnode = set()
    for x in inward2:
      Gnode.add(x[0])
    for y in outward2:
      Gnode.add(y[1])
    neighbours = set(Gnode) - U
    #print(node,":",neighbours)
    if (len(neighbours) > w_length):
      w_length = len(neighbours)
      w = node
  MCDS = MCDS | set([w])
  print(MCDS)
  inward3 = set(Pp.in_edges(w))
  outward3 = set(Pp.out_edges(w))
  edi = set()
  for x in inward3:
    edi.add(x[0])
  for y in outward3:
    edi.add(y[1])
  U = U | edi
  W = (W - set([w])) | (edi - set([v]))
  v = w
  

{'CDK1', 'FOXM1'}
{'CDK1', 'TP53', 'FOXM1'}
{'CDK1', 'MAPK3', 'TP53', 'FOXM1'}
{'CDK1', 'FOXM1', 'MAPK3', 'TP53', 'SMAD3'}
{'CDK1', 'FOXM1', 'AKT1', 'MAPK3', 'TP53', 'SMAD3'}
{'SRC', 'CDK1', 'FOXM1', 'AKT1', 'MAPK3', 'TP53', 'SMAD3'}
{'SRC', 'CDK1', 'FOXM1', 'AKT1', 'GNAI1', 'MAPK3', 'TP53', 'SMAD3'}
{'FOXM1', 'GNAI1', 'MAPK3', 'SRC', 'CDK1', 'AKT1', 'NOTCH1', 'TP53', 'SMAD3'}
{'FOXM1', 'GNAI1', 'MAPK3', 'SRC', 'GSK3B', 'CDK1', 'AKT1', 'NOTCH1', 'TP53', 'SMAD3'}
{'FOXM1', 'GNAI1', 'MAPK3', 'SRC', 'GSK3B', 'CDK1', 'AKT1', 'NOTCH1', 'TP53', 'SMAD3', 'MAPK14'}
{'FOXM1', 'GNAI1', 'MAPK3', 'GNAS', 'SRC', 'GSK3B', 'CDK1', 'AKT1', 'NOTCH1', 'TP53', 'SMAD3', 'MAPK14'}
{'FOXM1', 'GNAI1', 'MAPK3', 'GNAS', 'SRC', 'GSK3B', 'CDK1', 'AKT1', 'NOTCH1', 'CTNNB1', 'TP53', 'SMAD3', 'MAPK14'}
{'FOXM1', 'GNAI1', 'MAPK3', 'GNAS', 'SRC', 'GSK3B', 'CDK1', 'AKT1', 'NOTCH1', 'CTNNB1', 'KRAS', 'TP53', 'SMAD3', 'MAPK14'}
{'FOXM1', 'GNAI1', 'MAPK3', 'GNAS', 'MAPK8', 'SRC', 'GSK3B', 'CDK1', 'AKT1', 'NOTCH1', 'CTNNB

In [28]:
print(len(MAPK_path))
MAPK_path= MAPK_path.union(MCDS)
len(MAPK_path)

0


335

In [0]:
with open('FOXM1_path.txt', 'w') as filehandle:
  for x in MAPK_path:
    first= x
    first = first.replace("_"," ")
    filehandle.writelines("%s, " % first)