In [2]:
# one needs to import those packages which are needed; best to be done at the beginning of the program.
import networkx as nx
import networkx.algorithms.community as nx_comm
import numpy as np
import pandas as pd
import scipy as sp
import random as rn
from heapq import nlargest

# some basic settings for plotting figures
import matplotlib.pyplot as plt
%matplotlib inline 
font = {'family' : 'DejaVu Sans',
        'weight' : 'bold',
        'size'   : 32}

plt.rc('font', **font)
import community as community_louvain

In [3]:
G0 = nx.read_weighted_edgelist("4932.protein.links.v11.5.txt",comments="#",nodetype=str)

In [4]:
threshold_score = 400
for edge in G0.edges: 
    weight = list(G0.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G0.remove_edge(edge[0],edge[1])

In [5]:
# some basic information
print('number of nodes of G0:',G0.number_of_nodes())
print('number of edges of G0:',G0.number_of_edges())
print('Is the full G0 connected?',nx.connected.is_connected(G0))
print('How many connected subgraphs are there?',nx.connected.number_connected_components(G0))

number of nodes of G0: 6394
number of edges of G0: 282074


In [7]:
#get the largest component
largest_cc = max(nx.connected_components(G0),key=len)
G = G0.subgraph(largest_cc)
print('Type',type(largest_cc))
print('number of nodes of largest connected subgraph of G:',G.number_of_nodes())
print('number of edges of largest connected subgraph of G0:',G.number_of_edges())

Type <class 'set'>
number of nodes of largest connected subgraph of G: 6113
number of edges of largest connected subgraph of G0: 282074


In [8]:
# remove the essential nodes from G0
ess=pd.read_csv("essential_pro.csv",header=None)
ess_pro=pd.Series.to_list(ess[1])
for i in range(len(ess_pro)):
    ess_pro[i]='4932.'+ess_pro[i]
G0.remove_nodes_from(ess_pro)

In [12]:
# new information
print('number of nodes of G0 without essential nodes:',G0.number_of_nodes())
print('number of edges of G0 without essential nodes:',G0.number_of_edges())

number of nodes of G0: 5098
number of edges of G0: 137012


In [14]:
# narrow our selection to the proteins connected to ours
nodes = nx.shortest_path(G0,'4932.YKL126W').keys()
G=G0.subgraph(nodes)

In [15]:
# some basic information #3
print('number of nodes of G:',G.number_of_nodes())
print('number of edges of G:',G.number_of_edges())

number of nodes of G: 4827
number of edges of G: 137012


In [4]:
# time to define a parent class of network
class Network:
    R = 50
    N = 10

    def __init__(self, graph, homologue, partition_method="louvain"):
        self.graph = graph
        self.homologue = homologue
        self.partition_method = partition_method

        self.partitions = []
        self.homologue_communities = []
        self.central_nodes = [] # { encoding : centrality }
        self.important_nodes = {}
        # TODO: self.adjacent_communities = []

        self.set_partitions_robust()
        self.set_homologue_communities()
        self.set_central_nodes_robust()
        self.set_important_nodes()

    # def community_collector(self,comm):
    #     number_of_communities = max(comm.values())+1
    #     communities = {} #empty dictionary
    #     for i in range(number_of_communities):
    #         communities[i] = [] #create an empty list for each community
    #     for name, community in comm.items():
    #         communities[community].append(name) 
    #     return communities

    def set_partitions_robust(self):
        def find_partition(graph, partition_method, s):
            if partition_method == "louvain":
                return nx_comm.louvain_communities(
                    graph, resolution=Network.R, seed=s)

        for i in range(Network.N):
            self.partitions.append(self.find_partition(
                self.graph, self.partition_method, i))

    def set_homologue_communities(self):
        for part in self.partitions:
            for i in range(len(part)):
                if self.homologue in part[i]:
                    sub = G.subgraph(part[i])
                    self.homologue_communities.append(sub)
                    break

    def set_central_nodes_robust(self):
        def find_central_nodes(community):
            """return a list of the most significant nodes according to three centrality measures"""
            a= nx.degree_centrality(community)
            b= nx.betweenness_centrality(community)
            c= nx.eigenvector_centrality(community)
            a5=nlargest(5, a, key = a.get)
            b5=nlargest(5, b, key = b.get)
            c5=nlargest(5, c, key = c.get)
            return list({*a5,*b5,*c5})

        for i in range(Network.N):
            self.central_nodes.append(
                self.find_central_nodes(self.homologue_communities[i]))
        # TODO: decide how to cross reference the results

    def set_important_nodes(self):
        # flatten the central nodes list
        flat_central_nodes = [y for x in self.central_nodes for y in x]
        for node in flat_central_nodes:
            if node not in self.important_nodes:
                self.important_nodes[node]=flat_central_nodes.count(node)

    def get_partitions(self):
        return self.partitions

    def get_homologue_communities(self):
        return self.homologue_communities

    def get_central_nodes(self):
        return self.central_nodes
    
    def get_important_nodes(self):
        return self.important_nodes

In [18]:
protein=Network(G,homologue='4932.YKL126W')

In [19]:
protein.get_important_nodes()

{'4932.YIL105C': 5,
 '4932.YMR068W': 8,
 '4932.YMR104C': 8,
 '4932.YKL126W': 10,
 '4932.YBR270C': 8,
 '4932.YJL058C': 8,
 '4932.YNL047C': 3,
 '4932.YMR102C': 1,
 '4932.YKL128C': 1,
 '4932.YMR103C': 1,
 '4932.YMR101C': 1,
 '4932.YBR013C': 1,
 '4932.YDR466W': 1,
 '4932.YBR028C': 1,
 '4932.YDL037C': 1,
 '4932.YDL039C': 1}