In [1]:
# imports
import networkx as nX
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np
from unipressed import IdMappingClient
import time
from collections import Counter
import os

In [7]:
# Read the Csv file
path = '../pathLinkerData/PathLinker_2018_human-ppi-weighted-cap0_75.csv'
df = pd.read_csv(path)

In [None]:
# initializing the Graph
Graph = nX.Graph()
for i in range(5000):
    tail=df['#tail'][i] # first column as node
    head=df['head'][i]# second column as  node
    weight=df['edge_weight'][i] # third column as edge cost/weight
    Graph.add_weighted_edges_from([(head,tail,weight)])

In [None]:
# Network Graph
#position nodes 
pos = nX.spring_layout(Graph)
#calculate betweeness centrality 
betCent = nX.betweenness_centrality(Graph, normalized=True, endpoints=True)
# node size varies with betweeness centrality
node_size =  [v * 10000 for v in betCent.values()]

#create figure
plt.figure(figsize=(20,20))
nX.draw_networkx(Graph, pos=pos, with_labels=False, node_size=node_size)
plt.axis('off')
plt.savefig('./Graphs/Graph.jpg')
plt.show()

In [None]:
#take degrees from the network
degree_sequence = sorted([d for n, d in Graph.degree()], reverse=True)
#count  degree frequency
degreeCount = Counter(degree_sequence)
plt.hist(degreeCount, bins='auto') #auto bin size is used
plt.title("Degree Histogram")
plt.xlabel("Degree")
plt.ylabel("Nodes")
plt.savefig('./Graphs/histogram.jpg')
plt.show()
#save the proteins in csv with its coressponding degrees
to_arr =np.array(Graph.degree())
proteins = []
coressponding_degree = []
for i in range (0,len(to_arr),1):
    proteins.append(to_arr[i,0])
    coressponding_degree.append(to_arr[i,1])
df = pd.DataFrame({"Protein" : proteins, "Degree" :coressponding_degree })
df.to_csv("./Csv Files/nonsorted.csv", index=False)
# sorting from highest to lowest rank according to connections
sort = pd.read_csv('./Csv Files/nonsorted.csv')
sort.sort_values(["Degree"],ascending=[False],inplace = True)
degreeCount = Counter(degree_sequence)
sort.to_csv("./Csv Files/sorted_setOfProteins.csv",index=False)
os.remove('./Csv Files/nonsorted.csv')

In [5]:
#Convert ONE UniProtID to its coressponding Gene Name
request_Protein = IdMappingClient.submit(
    source="UniProtKB_AC-ID", dest="Gene_Name", ids={"Q6UXB4"})
print(list((request_Protein.each_result())))

[{'from': 'Q6UXB4', 'to': 'CLEC4G'}]


In [None]:
#convert a set of UniprotIDs to its Coressponding Gene Name
Protein_set = {"Q5MIZ7","Q8TBF4","Q9NVL8","O75326","P20933","Q6P1J6","P30939","Q08379","Q2TAC2","P63000"}
request_setProteins = IdMappingClient.submit(
    source="UniProtKB_AC-ID", dest="Gene_Name", ids=Protein_set
)
time.sleep(5.0)
print(list(request_setProteins.each_result()))

In [None]:
g = nX.Graph()
for i in range(len(df)):
    node=df.loc[i,:][0] # first column as node
    next_node=df.loc[i,:][1]# first column as  node
    weight=df.loc[i,:][2] # third column as edge cost/weight
    g.add_weighted_edges_from([(node,next_node,weight)])
T = nX.minimum_spanning_tree(g)
print(nX.info(T))
pos = nX.spring_layout(T)
plt.figure(figsize=(20,20))
nX.draw_networkx(T, pos=pos, with_labels=False,
                 node_color='b',
                 node_size= 30 )
plt.axis('off')