### Initialization

In [3]:
## Packages to be used 

# Network Stuff 
import networkx as nx
import markov_clustering as mc
import random
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)


# Standard 
import pandas as pd 
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pickle 
import math


# %matplotlib inline 
# font = {'family' : 'DejaVu Sans',
#         'weight' : 'bold',
#         'size'   : 32}

# plt.rc('font', **font)


g = nx.read_weighted_edgelist("4932.protein.links.v12.0.txt",comments="#",nodetype=str)

In [4]:
threshold_score = 500

# remove below thresh value
for u, v in g.edges:
    if g.get_edge_data(u, v)['weight'] < threshold_score:
      g.remove_edge(u, v)

# remove weights
for node, edges in nx.to_dict_of_dicts(g).items():
    for edge, attrs in edges.items():
        attrs.pop('weight', None)

matrix = nx.to_numpy_array(g)
node_list = list(g.nodes)

# These are the important connections that we know SGS1 connects to 
related_proteins = ['4932.YMR190C','4932.YNL088W','4932.YLR234W','4932.YPL024W','4932.YMR167W' ]

In [5]:
### Functions to be created/copypasted 

# def graph_details() 
    # Number of nodes
    # Number of edges 
    # Connectivity Status (Fully or Not) 
    # Degree Distribution: PLOTTING 
    # Degree of SGS1 Node 
    # .... 

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# markov clustering, with inflation parameter
def mcl(mtx,inflation_parameter):
    result = mc.run_mcl(mtx,inflation = inflation_parameter)
    clusters = mc.get_clusters(result)

    #relabelling node names 
    for i in range(0,len(clusters)):
        clu_list = list(clusters[i])
        
        for j in range(0,len(clu_list)):
            name = node_list[clu_list[j]]
            clu_list[j] = name
        clusters[i] = tuple(clu_list)

    return inflation_parameter, result, clusters   

def large_comm(cluster,threshold):
    large = []
    for i in cluster:
        if len(i)>=threshold:
            large.append(i)

    return large

#turns clusters into adjacency matrix
def clu_to_adj_mtx(cluster):
    node_index = []
    for i in cluster:
        node_index.append(node_list.index(i))

    mat1 = matrix[node_index, :]
    out_mat = mat1[:, node_index]
    
    return out_mat

#turns adjacency matrix into graph 
def adj_mtx_to_graph(mat,name):
    graph = nx.from_numpy_array(mat)
    graph = nx.relabel_nodes(graph,name)

    return graph

# turns graph into outputs from some centrality measures, 
# The centrality measures include degree centrality, eigenvector centrality, closeness centrality, and betweenness centrality
def graph_to_cent_meas(graph):
    result_dict={}
    result_dict['degree'] = sorted(nx.degree_centrality(graph).items(), key=lambda x:x[1],reverse = True)
    result_dict['eigenvector'] = sorted(nx.eigenvector_centrality(graph).items(), key=lambda x:x[1],reverse = True)
    result_dict['closeness'] = sorted(nx.closeness_centrality(graph).items(), key=lambda x:x[1],reverse = True)
    result_dict['betweenness'] = sorted(nx.betweenness_centrality(graph).items(), key=lambda x:x[1],reverse = True)
    return result_dict

def important_nodes(cent_meas_of_clus, n_of_nodes):
    for i in cent_meas_of_clus:
        print(i+':')
        for j in range(0,n_of_nodes):
            print(cent_meas_of_clus[i][j])
        print('\n')

    # COMMUNITY FINDING ALGOS HERE 

    # def community_partitions(graph, initial_thresh, trials) 
    # Edge Dropout 
    # threshold_score = initial_thresh
    # for edge in graph.edges: 
    #     weight = list(graph.get_edge_data(edge[0],edge[1]).values())
    #     if(weight[0] < threshold_score):
    #         graph.remove_edge(edge[0],edge[1])



    

In [6]:
#Markov Clustering

# 1.4 is the inflation parameter that maximises modularity 
inflation_parameter = 1.4
markov_clustering_14 = mcl(matrix,inflation_parameter)

#sort the cluster based on size
clusters = markov_clustering_14[2]
clusters = sorted(clusters, key=len, reverse=True)

In [7]:
# matches number to the name of the node 
cluster_nodename_dict_list=[]
for i in clusters: 
    clu_dict = {}
    for j in range(0,len(i)):
        clu_dict[j] = i[j]

    cluster_nodename_dict_list.append(clu_dict)

#the following turns clusters into adjacency matrices into graphs into centrality measures for each clusters 
clusters_adj_mtx = []
for i in range(0,len(clusters)):
    clusters_adj_mtx.append(clu_to_adj_mtx(clusters[i]))

clusters_graph = []
for i in range(0,len(clusters_adj_mtx)):
    clusters_graph.append(adj_mtx_to_graph(clusters_adj_mtx[i],cluster_nodename_dict_list[i]))

clusters_cent_meas = []
for i in range(0,len(clusters_graph)):
    clusters_cent_meas.append(graph_to_cent_meas(clusters_graph[i]))

In [8]:
# centrality measures 

# Only look at larger clusters 
large_clusters_cent_meas=[]
for i in range(0,len(clusters_cent_meas)):
    if len(clusters_cent_meas[i]['degree'])>=20:
        large_clusters_cent_meas.append(clusters_cent_meas[i])

nodes_to_print = 10
counter = 0
for i in large_clusters_cent_meas:
    print(counter,'~~~~~~~~~~~~~~~~~~~~')
    important_nodes(i,nodes_to_print)
    counter+=1

0 ~~~~~~~~~~~~~~~~~~~~
degree:
('4932.YLL013C', 0.19596354166666666)
('4932.YGL026C', 0.16796875)
('4932.YBR196C', 0.12369791666666666)
('4932.YBL099W', 0.123046875)
('4932.YDR050C', 0.12044270833333333)
('4932.YCR012W', 0.1171875)
('4932.YKL211C', 0.11653645833333333)
('4932.YLL041C', 0.11328125)
('4932.YKL192C', 0.111328125)
('4932.YER069W', 0.107421875)


eigenvector:
('4932.YDR050C', 0.12172156501784563)
('4932.YBR196C', 0.12146130361943822)
('4932.YNR001C', 0.11945201156953499)
('4932.YPL262W', 0.11779783238672428)
('4932.YCR012W', 0.11400826860998146)
('4932.YGL026C', 0.11176724477178801)
('4932.YLR304C', 0.11151186138834446)
('4932.YCR005C', 0.1102609390837423)
('4932.YBL099W', 0.10840472235330212)
('4932.YKL085W', 0.10779431384183395)


closeness:
('4932.YLL013C', 0.5106382978723404)
('4932.YBL099W', 0.471889400921659)
('4932.YCR012W', 0.47116564417177914)
('4932.YGL026C', 0.4692942254812099)
('4932.YKL192C', 0.4668693009118541)
('4932.YGR192C', 0.4620938628158845)
('4932.YDR05

In [14]:
################################################## 
############### put code for fast label prop 
#########################

largest_cc = max(nx.connected_components(g) , key=len) 
Gc = g.subgraph(largest_cc) 

### INPUT : Gc   <--- a connected Graph 


###################################################################################################### 
commus = nx.community.label_propagation_communities(Gc) 
print("commu # :" , len(commus))
modularity_v1 = nx.community.modularity(Gc , commus) 

print("modularity :" , modularity_v1) 


G = nx.Graph(Gc) 
for i in commus : 
    if len(i) < 2 : 
        for ii in i : 
            G.remove_node(ii) 
print("////////// REMOVAL ///////////") 
print("\t|G0| :" , g.number_of_nodes()) 
print("\t|Gc| :" , Gc.number_of_nodes()) 
print("\t|G| :" , G.number_of_nodes()) 

newer_commus = nx.community.label_propagation_communities(G) 
modularity_v2 = nx.community.modularity(G , newer_commus) 

print("new modularity :" , modularity_v2) 
#######################################################################################  






###################################################################################################### 
###################################################################################################### 
def run_many_times(n=101 , G=G) : 
    run_records = [] 
    for i in range(n) : 
        random_number_in_range = random.randint(0 , 2**(64)-1) 
        rv = nx.community.fast_label_propagation_communities(G , seed=random_number_in_range) 
#        rv = nx.community.asyn_fluidc(G , 80 , seed=random_number_in_range) 
#        rv = nx.community.louvain_communities(G , seed=random_number_in_range) 
#        rv = nx.community.kernighan_lin_bisection(G , seed=random_number_in_range) 
        rv_list = [] 
        
        
        for ii in rv : 
            rv_list.append(ii) 
        #for i in rv_list : 
        #    print(i) 
        
        run_records.append([rv_list , random_number_in_range]) 
        
        print("                    " , end='\r') 
        print("( O v O) {" , i+1 , ")" , end='\r') 

    
    return run_records 
###################################################################################################### 
###################################################################################################### 
def tmp_F(runs , in_G=G) :  
    rela_c = {} 
    for i in in_G : 
        rela_c[i] = {} 
        for ii in in_G : 
            rela_c[i][ii] = 0 

    tmp_c = 0 
    for iiii in runs : 
        for i in in_G : 
            for ii in iiii[0] : 
                if i in ii : 
                    for iii in ii : 
                        if i == iii : 
                            continue 
                        rela_c[i][iii] += 1 
        tmp_c += 1 
        print(str(tmp_c) + "/" + str(len(runs)) + " RUN\t[ OK ]" , end='\r') 

    return rela_c 
###################################################################################################### 
###################################################################################################### 
def dense_commu_of(tar_node , rela_counter_in , run_result) : 
    dense_commu = [] 
    drifters = [] 
    for i in rela_counter_in[tar_node] : 
        if rela_counter_in[tar_node][i]/len(run_result) >= 0.95 :        #overlap 95% of times 
            dense_commu.append(i) 
        elif rela_counter_in[tar_node][i]/len(run_result) >= 0.5 : 
            if i != tar_node : 
                drifters.append(i) 
            
    return [dense_commu , drifters] 
###################################################################################################### 
###################################################################################################### 
def QC_Idea_v2(relas , runs , starting_node="4932.YMR190C") : 
    
    self_def_commus = [] 
    
    all_nodes = [] 
    all_nodes_info = {} 
    for i in relas : 
        all_nodes.append( i ) 
        all_nodes_info[i] = [0 , -1 , []]    # [?dense , dense # , belong] 
    #print(">>" , len(all_nodes_info)) 

    new_commu_id_counter = 0 

    if len(starting_node) > 0 : 
        cur_tar = starting_node 
    

        raw_dense_list , raw_drifter_list = dense_commu_of(cur_tar , relas , runs) 

        new_commu_pack = [cur_tar] 
        for ii in raw_dense_list : 
            #if 0 == all_nodes_info[ii][0] : 
            new_commu_pack.append(ii) 
        for ii in new_commu_pack : 
            all_nodes_info[ii][0] = 1 
            all_nodes_info[ii][1] = new_commu_id_counter 
            all_nodes_info[ii][2].append(new_commu_id_counter) 
        for ii in raw_drifter_list : 
            all_nodes_info[ii][2].append(new_commu_id_counter) 
    
        self_def_commus.append(new_commu_pack) 
    
    
        new_commu_id_counter = 1 


    for i in relas : 
        cur_tar = i 
        if all_nodes_info[cur_tar][0] == 1 :      # already dense 
            continue 
#        elif len(all_nodes_info[cur_tar][2]) > 0 :    # drifter 
#            continue 
        else : 
            tmp = dense_commu_of(cur_tar , relas , runs) 
            raw_dense_list = tmp[0] 
            raw_drifter_list = tmp[1] 
            new_commu_pack = [cur_tar] 

            if len(raw_dense_list) < 11 : 
                continue 
            
            for ii in raw_dense_list : 
                if 0 == all_nodes_info[ii][0] : 
                    new_commu_pack.append(ii) 
                    
            if len(new_commu_pack) > 10 : 
                self_def_commus.append(new_commu_pack) 
            else : 
                continue 
                
            for ii in new_commu_pack : 
                all_nodes_info[ii][0] = 1 
                all_nodes_info[ii][1] = new_commu_id_counter 
                all_nodes_info[ii][2].append(new_commu_id_counter) 
            for ii in raw_drifter_list : 
                all_nodes_info[ii][2].append(new_commu_id_counter) 


        new_commu_id_counter += 1 

            
        
    return [self_def_commus , all_nodes_info] 
###################################################################################################### 
###################################################################################################### 
def possi_dests(ni , dense_commus , nodes_info) : 

    trig = 0 
    
    ni_idx = 0 
    for i in dense_commus : 
        if ni in i : 
            print("[" + str(ni) + "]" , "@" , ni_idx)
            break 
        ni_idx += 1 


    possi_padding = [] 
    possi_dest = [] 
    feelted_possi_dest = [] 
    feelted_possi_padding = [] 

    
    for i in nodes_info : 
        if (ni_idx in nodes_info[i][2]) and nodes_info[i][0] == 0 :    #nodes_info[i][0] == 0 and 
            # collect drifters 
            possi_padding.append(i) 
            for ii in nodes_info[i][2] : 
                if ii not in possi_dest and ii != ni_idx : 
                    possi_dest.append(ii) 
    #print(len(possi_padding) , possi_dest) 
    #print(len(dense_commus)) 
    #print("~~~~~~~~~~~~") 
    for i in possi_dest : ########## 
        
        if len(dense_commus[i]) >= 2 : 
            feelted_possi_dest.append(i) 
    if 0 == len(feelted_possi_dest) : 
        print("( ' ^ ') { EMPTY )") 
        trig = 1 

    #print(feelted_possi_dest) 
    #print("paddings :" , str(len(possi_padding)) + "<>" + str(len(tmp_box[ni_idx])) , ": dense") 
    if trig : 
        return [possi_dest , possi_padding] 
    return [feelted_possi_dest , possi_padding] 
###################################################################################################### 
###################################################################################################### 
def top_few_btween_member(Gin , lookfor = "" , show=0) : 
    if len(Gin) < 5 : 
        #2/0 
        tmp_l = [i for i in Gin] 
        return tmp_l 
        
    tmp_btweenCentra = nx.betweenness_centrality(Gin) 
    tmp_L2 = [] 
    tmp_L1 = [] 
    pack = [] 
    for i in Gin : 
        tmp_L1.append(tmp_btweenCentra[i]) 
        tmp_L2.append(i) 

    tmp_L1, tmp_L2 = zip(*sorted(zip(tmp_L1, tmp_L2)))   ### Thanks to https://stackoverflow.com/questions/9764298/given-parallel-lists-how-can-i-sort-one-while-permuting-rearranging-the-other 

    
    for i in range(5) : 
        if show : 
            print("[ " + tmp_L2[-(1+i)] , "|" + str(tmp_L1[-(1+i)]) + "]") 
        pack.append(tmp_L2[-(1+i)]) 
    if len(lookfor) > 0 : 
        if lookfor in tmp_L2 : 
            for i in tmp_L2 : 
                if i == lookfor : 
                    print("#" + str(len(tmp_L2) - tmp_L2.index(lookfor)) , "\t" , lookfor) 
        else : 
            print("( O _ O) { ??? )") 
    return pack
###################################################################################################### 
###################################################################################################### 
def unique_intersecting_proteins(df, target_protein):

    if df.empty:
        return []

    if 'intersections' not in df.columns:
        raise KeyError("'intersections vanished")
    
    filtered_df = df[df['intersections'].apply(lambda x: isinstance(x, list) and len(x) > 1 and target_protein in x)]

    unique_proteins = set()
    for intersections in filtered_df['intersections']:
        if isinstance(intersections, list):
            unique_proteins.update(intersections)
    
    unique_proteins.discard(target_protein)
    return list(unique_proteins)
###################################################################################################### 
###################################################################################################### 

###################################################################################################### 
###################################################################################################### 






















tar_node = "4932.YMR190C" 

###################################################################################################### 
run_result = run_many_times(33 , G) 
rela_counter = tmp_F(run_result , G) 
###################################################################################################### 
tmp = QC_Idea_v2(rela_counter , run_result , tar_node) 
self_def_commus = tmp[0]    #self def commus 
all_node_info = tmp[1]     # info 
tar_idx = 0 
for i in self_def_commus : 
    if tar_node in i : 
        print("[" + tar_node + "]" , "@" , tar_idx) 
        break 
    tar_idx += 1 
###################################################################################################### 
print(len(self_def_commus)) 
tmp_l = [] 
for i in all_node_info :
    for ii in all_node_info[i][2] : 
        tmp_l.append(ii) 
print(max(tmp_l)) 
###################################################################################################### 
feelted_self_def_commus = [] 
for i in self_def_commus : 
    if len(i) > 7 : 
        feelted_self_def_commus.append(i) 

print(" |pre feelter| =" , len(self_def_commus)) 
print("|post feelter| =" , len(feelted_self_def_commus)) 
###################################################################################################### 
other_sides , paddings = possi_dests(tar_node , self_def_commus , all_node_info) 

print(other_sides) 
print(len(paddings)) 
###################################################################################################### 

""" 
print() 
for i in other_sides : 
    tmp_pack = [] 
    dd_list = [] 
    tmp_G = G.subgraph(self_def_commus[i]) 
    

    for ii in paddings : 
        if i in all_node_info[ii][2] : 
            tmp_pack.append(ii) 
    tmp_top_btw = top_few_btween_member(tmp_G , "" , 0) 
    print("   starting point" , ":\t" , "4932.YMR190C") 
    #print("\tdrifters :\t" , tmp_pack) 
    print("essens 0f commu_" + str(i) + " :\t" , tmp_top_btw) 

    for ii in self_def_commus[tar_idx] : 
        dd_list.append(ii) 
    for ii in self_def_commus[i] : 
        dd_list.append(ii) 
    for ii in tmp_pack : 
        dd_list.append(ii) 
    dd_G = G.subgraph(dd_list) 

    for i in range(len(tmp_top_btw)) : 
        try : 
            print(">>>LinkingPath " + str(i+1) + " : HEAD --> TAIL\n\t\t" , nx.shortest_path(dd_G , "4932.YMR190C" , tmp_top_btw[i]) ) 
            #break 
        except nx.NetworkXNoPath : 
            print("can't reach" , "[" + tmp_top_btw[i] + "]") 
            if i == len(tmp_top_btw) - 1 : 
                print("[ NO PATH ]") 
            #print("/////////////////////////////////////////\n////////////// NO PATH /////////////////\n///////////////////////////////////") 
            
        
    #print(">>>LinkingPath : HEAD --> Drifter\n\t\t" , nx.shortest_path(G , "4932.YMR190C" , "4932.YDR545W") ) 
        
    print("\n\n") 
""" 







#############

########################


#################################################################### 
#################################
######################################################





commu # : 23
modularity : 0.06077454169731462
////////// REMOVAL ///////////
	|G0| : 6538
	|Gc| : 6160
	|G| : 6160
new modularity : 0.06077454169731462
[4932.YMR190C] @ 0  
30
29
 |pre feelter| = 30
|post feelter| = 30
[4932.YMR190C] @ 0
[2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29]
223


' \nprint() \nfor i in other_sides : \n    tmp_pack = [] \n    dd_list = [] \n    tmp_G = G.subgraph(self_def_commus[i]) \n    \n\n    for ii in paddings : \n        if i in all_node_info[ii][2] : \n            tmp_pack.append(ii) \n    tmp_top_btw = top_few_btween_member(tmp_G , "" , 0) \n    print("   starting point" , ":\t" , "4932.YMR190C") \n    #print("\tdrifters :\t" , tmp_pack) \n    print("essens 0f commu_" + str(i) + " :\t" , tmp_top_btw) \n\n    for ii in self_def_commus[tar_idx] : \n        dd_list.append(ii) \n    for ii in self_def_commus[i] : \n        dd_list.append(ii) \n    for ii in tmp_pack : \n        dd_list.append(ii) \n    dd_G = G.subgraph(dd_list) \n\n    for i in range(len(tmp_top_btw)) : \n        try : \n            print(">>>LinkingPath " + str(i+1) + " : HEAD --> TAIL\n\t\t" , nx.shortest_path(dd_G , "4932.YMR190C" , tmp_top_btw[i]) ) \n            #break \n        except nx.NetworkXNoPath : \n            print("can\'t reach" , "[" + tmp_top_btw[i] + "]")

In [46]:
def tmp_H(starting_node , to_commus , drifters , node_infos , dense_commus , G=G): 
  #                      other_sides   paddings   all_node_info   self_def_commus 
    comm_func_similar = {}
    idx = 0 
    for i in dense_commus : 
        if starting_node in i : 
            break 
        idx += 1 

    for i in to_commus : 
        tmp_pack = [] 
        dd_list = [] 
        tmp_G = G.subgraph(dense_commus[i]) 
        
    
        for ii in drifters : 
            if i in node_infos[ii][2] : 
                tmp_pack.append(ii) 
        tmp_top_btw = top_few_btween_member(tmp_G , "" , 0) 
        print("   starting point" , ":\t" , starting_node , " @ " , idx) 
        #print("\tdrifters :\t" , tmp_pack) 
        print("essens 0f commu_" + str(i) + " :\t" , tmp_top_btw) 



        ################## 
        jump_path = [] 
        land_path = [] 
        ################## 
        for ii in dense_commus[idx] : 
            #dd_list.append(ii) 
            jump_path.append(ii) 
        for ii in tmp_pack : 
            dd_list.append(ii) 
            jump_path.append(ii) 
            land_path.append(ii) 
        for ii in dense_commus[i] : 
            #dd_list.append(ii) 
            land_path.append(ii) 
        dd_G = G.subgraph(dd_list) 
        tmp_top_btw_drifters = top_few_btween_member(dd_G , "" , 0) 

        ########## 
        new_tmp_pack = [] 
        for ii in tmp_top_btw_drifters : 
            new_tmp_pack.append(ii) 
        tmp_pack = new_tmp_pack 
        print("essen drifer <" , idx , i , ">:" , tmp_pack , "\n") 
        ########
        functionality = 0
        linking_paths = []
        for ii in range(len(tmp_top_btw)) : 
            try : 

                shortest_path_for_now = [] 
                via_node = "[ NOTHING ]" 
                
                for iii in range(len(tmp_pack)) : 
                ########## 
                    try : 
                        G_LR = G.subgraph(jump_path) 
                        left_p = nx.shortest_path(G_LR , starting_node , tmp_pack[iii]) 
                        G_LR = G.subgraph(land_path) 
                        right_p = nx.shortest_path(G_LR , tmp_pack[iii] , tmp_top_btw[ii]) 
                        if iii == 0 : 
                            shortest_path_for_now = left_p[:-1] + right_p 
                            via_node = tmp_pack[iii] 
                            continue 
                        if len( left_p[:-1] + right_p ) < len(shortest_path_for_now) : 
                            shortest_path_for_now = left_p[:-1] + right_p 
                            via_node = tmp_pack[iii] 
                    except nx.NetworkXNoPath : 
                        continue 

                ########## 
                #print(">>>LinkingPath " + str(ii+1) + " : HEAD --> TAIL\n\t\t" , nx.shortest_path(dd_G , starting_node , tmp_top_btw[ii]) ) 
                if 0 == len(shortest_path_for_now) : 

                    print(">>>LinkingPath " + str(ii+1) + " : HEAD --> TAIL  via" , via_node , "\n\t\t" , "[ NO PATH ]" ) 
                    continue 
                df = gp.profile( organism="scerevisiae", query=shortest_path_for_now, no_evidences=False)
                unique_proteins = unique_intersecting_proteins(df, starting_node)
                functionality = functionality + len(unique_proteins)/(len(shortest_path_for_now)-1)
                linking_paths.append(shortest_path_for_now)
                print(">>>LinkingPath " + str(ii+1) + " : HEAD --> TAIL  via" , via_node , "\n\t\t" , shortest_path_for_now, "\n\t\t Functionally Similar Proteins:", unique_proteins, "\n\t\t Score =", len(unique_proteins)/(len(shortest_path_for_now)-1)  ) 
                #break 
            except nx.NetworkXNoPath : 
                print("can't reach" , "[" + tmp_top_btw[ii] + "]") 
                if ii == len(tmp_top_btw) - 1 : 
                    print("[ NO PATH ]") 
                #print("/////////////////////////////////////////\n////////////// NO PATH /////////////////\n///////////////////////////////////") 
        functionality = functionality / len(tmp_top_btw)
        comm_func_similar["Community "+ str(i)] = {"Score":functionality, "Linking Path":linking_paths}
        print("\n\n")
        #print(">>>LinkingPath : HEAD --> Drifter\n\t\t" , nx.shortest_path(G , "4932.YMR190C" , "4932.YDR545W") ) 
            
    return comm_func_similar

In [47]:
comm_func_similar = tmp_H("4932.YMR190C" , other_sides , paddings , all_node_info, self_def_commus , G) 

   starting point :	 4932.YMR190C  @  0
essens 0f commu_2 :	 ['4932.YGL026C', '4932.YER069W', '4932.YMR169C', '4932.YPR035W', '4932.YBR208C']
essen drifer < 0 2 >: ['4932.YLL036C', '4932.YOR353C', '4932.YJR012C', '4932.YER171W', '4932.YLR046C'] 

>>>LinkingPath 1 : HEAD --> TAIL  via 4932.YLL036C 
		 ['4932.YMR190C', '4932.YAR007C', '4932.YLL036C', '4932.YJR051W', '4932.YGL026C'] 
		 Functionally Similar Proteins: ['4932.YAR007C'] 
		 Score = 0.25
>>>LinkingPath 2 : HEAD --> TAIL  via 4932.YER171W 
		 ['4932.YMR190C', '4932.YER171W', '4932.YGL058W', '4932.YOR128C', '4932.YER069W'] 
		 Functionally Similar Proteins: ['4932.YOR128C', '4932.YGL058W', '4932.YER069W', '4932.YER171W'] 
		 Score = 1.0
>>>LinkingPath 3 : HEAD --> TAIL  via 4932.YLL036C 
		 ['4932.YMR190C', '4932.YAR007C', '4932.YLL036C', '4932.YJL045W', '4932.YMR169C'] 
		 Functionally Similar Proteins: ['4932.YAR007C'] 
		 Score = 0.25
>>>LinkingPath 4 : HEAD --> TAIL  via 4932.YER171W 
		 ['4932.YMR190C', '4932.YER171W', '49

In [52]:
sorted_community = dict(sorted(comm_func_similar.items(), key=lambda item: item[1]['Score'], reverse=True))
for community, value in sorted_community.items():
    print(f"{community}: {value['Score']:.3f}")


Community 17: 1.000
Community 18: 0.950
Community 4: 0.900
Community 16: 0.870
Community 20: 0.867
Community 13: 0.800
Community 3: 0.800
Community 21: 0.800
Community 12: 0.733
Community 8: 0.667
Community 2: 0.650
Community 7: 0.567
Community 28: 0.500
Community 10: 0.457
Community 15: 0.350
Community 11: 0.233
Community 19: 0.200
Community 6: 0.167
Community 14: 0.143
Community 9: 0.000
Community 23: 0.000
Community 24: 0.000
Community 25: 0.000
Community 26: 0.000
Community 27: 0.000
Community 29: 0.000


In [None]:
# comparison with other algorithms 
def similarity_of_clusterings(clusters1,name1, clusters2,name2):
    clusters1 = sorted(clusters1,key = len, reverse = True)
    clusters2 = sorted(clusters2,key = len, reverse = True)

    sim_list = []
    for i in range(0,len(clusters1)) :
        similarity = []
        for j in range(0,len(clusters2)):
            counter = 0
            for k in clusters1[i]:
                if k in clusters2[j]:
                    counter +=1
            score = 2*counter/(len(clusters1[i])+len(clusters2[j]))
            if score > 0:
                similarity.append((str(name2)+str(j)+' '+str(len(clusters2[j]))+':',score))
        similarity = sorted(similarity, key = lambda x:x[1], reverse= True)
        similarity.insert(0,str(str(name1)+str(i)+' '+str(len(clusters1[i]))+':'))
        sim_list.append(similarity)
    return sim_list

sim_llp_lmc = similarity_of_clusterings(large_comm(label_prop_1,30),'lp',large_comm(clusters,30),'mc')
for i in sim_llp_lmc:
    print(i[0],i[1])
    

In [None]:
# Actual Algorithms 



    # VERIFICATION STEPS 
    # Check if there are singletons: WHAT TO DO WITH THESE SINGLETONS? 
    # Check ????? 




    # RELEVANT OUTPUTS 

    # nodeassignment = 
    # Label prop and markov clustering have their output as list of list/tuples where 
    # the first layer is the list of communities and second layer is the list/tuple of the nodes within each community

    # nodenames  = list(nxgraph.nodes()) #NAMES VECTOR 
    # commnum = max(value for _, value in enumerate(nodeassignment))+1 #THIS SHOULD WORK IF NODEASSIGNMENT WORKS 
    # commdict = {} #A DICTIONARY WHOSE KEY IS THE COMMUNITY AND VALUE WILL BE A LIST  OF CLUSTERS IN THAT DICT  


    # for i in range(commnum): 
    # commdict[i] = []
    # for nodeint, cluster in enumerate(nodeassignment): 
    #     nodename = nodenames[nodeint]
    #     commdict[cluster].append(nodename) 




    # return nodenames, nodeassignment, commdict, commnum



# def overlapping_algo(PARAMS HERE) #THIS IS OUR MAIN ALGORITHM  
    
    
    # ANCHORS 
    # Run an initial community finding trial
    # Determine anchors 


    # DRIFTERS
    # SGS1connections = ["YMR190C", "YNL088W", "YLR234W", "YPL024W", "YMR167W"] # These are the important connections that we know SGS1 connects to 
    # testings = gp.profile( organism="scerevisiae", query=["YMR190C", "YNL088W", "YLR234W", "YPL024W", "YMR167W"])


    # PATH CONSTRUCTION 
    # Use profilers to eliminate irrelevant ones
    # Calculate ratio and define a ratio as "good" such that we consider it an appropriate path to go down
    # Determine the path to go down based on ratio which can be considered as edges of a graph
    # Repeat Step 5 to continue constructing edges
    # If a previously visited community/"drifter" or a dead end is reached, choose another path with good ratio to go down or backtrack to previous nodes with appropriate paths
    # Explore all appropriate paths???? 

    return #A FINAL LIST OF THE NODES THAT DENOTE A PATH STARTING FROM SGS1 








# def visualization_graph() 
    # Output the resulting graph: need to get a list? 



### Graph Characteristics

### Methodological Approach

### Conclusions

In [None]:
### Visualization of Graphs 

## PUT THE VISUALIZATION OUTPUTS HERE 