# Idea 2
- Find communities using MCL or Louvain to get us communities of size ~30
- Perform Enrichment Analysis on clusters

In [1]:
import networkx as nx
import numpy as np
import scipy as sp

In [2]:
# Reading in graph 
G = nx.read_weighted_edgelist("yeast.txt",comments="#",nodetype=str)

In [3]:
# Deleting edges that don't meet threshold score
threshold_score = 700
for edge in G.edges: 
    weight = list(G.get_edge_data(edge[0],edge[1]).values())
    if(weight[0] <= threshold_score):
        G.remove_edge(edge[0],edge[1])

We relabel the nodes to remove the 4932 tag

In [4]:
H = nx.relabel_nodes(G, lambda x: x[5:])

Get our clusters

In [9]:
louvain = nx.algorithms.community.louvain_communities(H, resolution=50, seed=123)
louvain.sort(key=len, reverse=True)

number_of_communities = len(louvain)
size_of_communities = [len(community) for community in louvain]

print("Number of communities: {}".format(number_of_communities))
print("Sizes of communities: ", size_of_communities)

Number of communities: 1291
Sizes of communities:  [61, 58, 54, 48, 43, 43, 41, 41, 41, 40, 40, 39, 39, 38, 38, 38, 37, 37, 37, 37, 36, 36, 36, 34, 34, 34, 33, 33, 32, 32, 32, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 26, 26, 25, 25, 25, 25, 24, 24, 24, 24, 24, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 

We then get the relevant proteins so we can perform cluster enrichment,
We will adapt this further once we get our cluster algorithm.
We perform this enrichment using the site
https://metascape.org/gp/index.html#/main/step1

In [12]:
for protein in louvain[12]:
    print(protein)

YOR049C
YNR055C
YNL065W
YIR039C
YPL058C
YHR048W
YGR143W
YIL121W
YNL318C
YDR011W
YOR153W
YML116W
YML076C
YKR104W
YDR072C
YGL013C
YLL063C
YBL005W
YOL156W
YBR008C
YOR162C
YLL028W
YJL219W
YDR406W
YIR042C
YOR172W
YLR266C
YNL231C
YOR380W
YGR224W
YGR281W
YNR070W
YIL120W
YIL013C
YER143W
YOR328W
YHR178W
YGR197C
YKR103W
