In [13]:
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from networkx.algorithms.community import k_clique_communities
import sklearn as sk
import random

sgs1 ="4932.YMR190C"

In [14]:
def graph_info(G0):
    # contains_SGS1 = None
    # contains_SGS1 = G0[sgs1] is not None
    print("Nodes:", G0.number_of_nodes(), "Edges:", G0.number_of_edges(), "Connected:", nx.connected.is_connected(G0), "Subgrahs:", nx.connected.number_connected_components(G0))#, "Contains SGS1:", contains_SGS1, "SGS1 degree:", G0.degree(sgs1))

In [15]:
def random_sample_subsets(G, N,threshold_val = 500, edge_removal_fraction=0.75):

    # if edge weight is less than thresh remove edge
    for edge in G.edges():
        weight = list(G.get_edge_data(edge[0],edge[1]).values())
        if(weight[0]<= threshold_val):
            G.remove_edge(edge[0],edge[1])
    
    subsets = []
    for _ in range(N):
        # copy of G to make a subset from
        H = G.copy()
        
        # number of edges to remove
        num_edges_to_remove = int(edge_removal_fraction * G.number_of_edges())
        
        # random selection of edges to remove
        edges = list(G.edges())
        edges_to_remove = random.sample(edges, num_edges_to_remove)
        
        # remove edges from H
        H.remove_edges_from(edges_to_remove)

        H = H.subgraph(max(nx.connected_components(H),key=len))
        
        subsets.append(H)
    
    return subsets

def evenly_split_from_tot_subsets(G, N, threshold_val=500):
    # if edge weight is less than thresh remove edge
    for edge in list(G.edges()):
        weight = list(G.get_edge_data(edge[0], edge[1]).values())
        if weight[0] <= threshold_val:
            G.remove_edge(edge[0], edge[1])

    # all edges in G
    all_edges = list(G.edges())

    # random ordering of edges
    random.shuffle(all_edges)

    # split into N subsets
    edge_subsets = [all_edges[i::N] for i in range(N)]

    # make subsets
    subsets = []
    for edges in edge_subsets:
        # add edges to new graph
        H = nx.Graph()
        H.add_edges_from(edges)

        # add all nodes into new graph
        H.add_nodes_from(G.nodes())

        # grab largest subgraph
        H = H.subgraph(max(nx.connected_components(H), key=len))

        subsets.append(H)

    return subsets

In [16]:
G0 = nx.read_weighted_edgelist("4932.protein.links.v12.0.txt", comments="#", nodetype=str)
graph_info(G0)

Nodes: 6538 Edges: 1412421 Connected: True Subgrahs: 1


Option 1: randomly grab some fraction of edges and then make N new graphs from that

In [17]:
subsets = random_sample_subsets(G0,20,threshold_val=500, edge_removal_fraction=0.75)

In [18]:
for subset in subsets:
    graph_info(subset)

Nodes: 5861 Edges: 47409 Connected: True Subgrahs: 1
Nodes: 5850 Edges: 47401 Connected: True Subgrahs: 1
Nodes: 5835 Edges: 47401 Connected: True Subgrahs: 1
Nodes: 5850 Edges: 47402 Connected: True Subgrahs: 1
Nodes: 5855 Edges: 47411 Connected: True Subgrahs: 1
Nodes: 5837 Edges: 47412 Connected: True Subgrahs: 1
Nodes: 5858 Edges: 47403 Connected: True Subgrahs: 1
Nodes: 5847 Edges: 47405 Connected: True Subgrahs: 1
Nodes: 5836 Edges: 47409 Connected: True Subgrahs: 1
Nodes: 5806 Edges: 47400 Connected: True Subgrahs: 1
Nodes: 5841 Edges: 47408 Connected: True Subgrahs: 1
Nodes: 5837 Edges: 47409 Connected: True Subgrahs: 1
Nodes: 5844 Edges: 47396 Connected: True Subgrahs: 1
Nodes: 5867 Edges: 47406 Connected: True Subgrahs: 1
Nodes: 5829 Edges: 47404 Connected: True Subgrahs: 1
Nodes: 5846 Edges: 47400 Connected: True Subgrahs: 1
Nodes: 5830 Edges: 47406 Connected: True Subgrahs: 1
Nodes: 5854 Edges: 47410 Connected: True Subgrahs: 1
Nodes: 5854 Edges: 47404 Connected: True Subgr

Option 2: take all the edges split them evenly into N subsets, no duplicates

In [19]:
subsets = evenly_split_from_tot_subsets(G0,20,threshold_val=500)

In [20]:
for subset in subsets:
    graph_info(subset)

Nodes: 3960 Edges: 9217 Connected: True Subgrahs: 1
Nodes: 4022 Edges: 9222 Connected: True Subgrahs: 1
Nodes: 4013 Edges: 9196 Connected: True Subgrahs: 1
Nodes: 4032 Edges: 9193 Connected: True Subgrahs: 1
Nodes: 3988 Edges: 9208 Connected: True Subgrahs: 1
Nodes: 3987 Edges: 9183 Connected: True Subgrahs: 1
Nodes: 4037 Edges: 9235 Connected: True Subgrahs: 1
Nodes: 4033 Edges: 9235 Connected: True Subgrahs: 1
Nodes: 3990 Edges: 9201 Connected: True Subgrahs: 1
Nodes: 3906 Edges: 9201 Connected: True Subgrahs: 1
Nodes: 4024 Edges: 9231 Connected: True Subgrahs: 1
Nodes: 4038 Edges: 9208 Connected: True Subgrahs: 1
Nodes: 4016 Edges: 9200 Connected: True Subgrahs: 1
Nodes: 3933 Edges: 9078 Connected: True Subgrahs: 1
Nodes: 4018 Edges: 9177 Connected: True Subgrahs: 1
Nodes: 4048 Edges: 9188 Connected: True Subgrahs: 1
Nodes: 4134 Edges: 9258 Connected: True Subgrahs: 1
Nodes: 4088 Edges: 9285 Connected: True Subgrahs: 1
Nodes: 4002 Edges: 9246 Connected: True Subgrahs: 1
Nodes: 4091 