## Import Packages

In [1]:
import os
import pathlib
import numpy as np
from tqdm import tqdm # Progress Bar

from helper.networks_functions import load_networks, create_samples, export2csv

%load_ext autoreload
%autoreload 2

## Set configurations

In [2]:
# Minimum and maximum size of networks to keep
minNetworkSize = 20
maxNetworkSize = 1000

minConnectance = 0.1

# List of desired fractions ('1' is automatically included)
frac_list = [0.8] #[0.5, 0.6, 0.7, 0.8, 0.9]

# Number of subsamples to generate
subsamples_reps = 1

# Subsample multiple times, take to one with the lowest components
min_components = False,

# Path to the networks folder to import
# WOL_path = 'data/networks/Web Of Life/**/*.csv'
Brimacombe_path = 'data/networks/topological_heterogeneity_upload/networks/*.txt'

# Path to the output file
output_dir = 'data/processed/'

weighted = False # currently the code is supporting only unweighted

# Unwanted communities to drop
ignored_communities = ['Journal', 'Microbiome', 'Actor', 'Plant-Ant', 'Food-Web', 'Multiples', 'Anemone-Fish',
                       'Legislature', 'Chicago', 'Denver', 'Minneapolis', 'San Francisco', 'Washington',
                       'Baseball', 'Basketball', 'Hockey']

## Load networks

Import networks from Brimacombe

In [None]:
# Load the databases | (Brimacombe's data)
networks = load_networks(Brimacombe_path, ignored_communities)

In [None]:
# Load the databases | (Web Of Life)
# networks_new = load_wol(WOL_path, rename_communities, symbiotic_relationships, columns_rows_drop, ignored_communities, transpose_list)
# networks.update(networks_new)

### Process the networks

In [None]:
for net_id, net_data in tqdm(networks.copy().items(), desc='Processing and subsampling'):
    
    # Loop each layer/monolayer network
    for layer in net_data["Network_Layers"].copy():
        
        adj_matrix = net_data["Network_Layers"][layer]['Adjacency_Matrix']
        edge_list = net_data["Network_Layers"][layer]['Edge_List'] # TODO: both forms are not needed, drop one
        
        # Drop small/large networks
        if sum(adj_matrix.shape)<minNetworkSize or sum(adj_matrix.shape)>maxNetworkSize:
            del networks[net_id]["Network_Layers"][layer]
            continue
        # Drop networks with low connectance
        connectance = np.count_nonzero(adj_matrix) / (adj_matrix.shape[0] * adj_matrix.shape[1])
        if connectance < minConnectance:
            del networks[net_id]["Network_Layers"][layer]
            continue
        
        # Generate subsamples
        sample = create_samples(adj_matrix, edge_list, frac_list, reps = subsamples_reps, weighted = weighted, min_components = min_components)
        net_data["Network_Layers"][layer]["Subsamples"].update(sample)
        
    # Drop empty networks (might result when all layers dosn't meet the defined requirements)
    if len(networks[net_id]["Network_Layers"]) == 0:
        del networks[net_id]

Save the edgelists

In [None]:
export2csv(networks, output_dir)