### Import Packages

In [1]:
import os
import pathlib
import numpy as np
from tqdm import tqdm # Progress Bar

from helper.networks_functions import load_networks, process_networks, export2csv

%load_ext autoreload
%autoreload 2

### Load networks

In [None]:
# Load the databases | (Brimacombe's data)
networks = load_networks(
    path = 'data/raw/networks/Brimacombe/topological_heterogeneity_upload/networks/*.txt', 
    ignored_communities = ['Microbiome', 'Plant-Ant', 'Food-Web', 'Multiples', 'Anemone-Fish', 'Legislature', 'Actor', 
                            'Journal', 'Baseball', 'Basketball', 'Chicago', 'Denver', 'Hockey', 'Minneapolis', 'San Francisco','Washington']
    )

# Load the databases | (Web Of Life)
# networks_new = load_wol(WOL_path = 'data/networks/Web Of Life/**/*.csv', rename_communities, symbiotic_relationships, columns_rows_drop, ignored_communities, transpose_list)
# networks.update(networks_new)

### Process the networks

In [None]:
# Processing and subsampling
networks = process_networks(
    networks,
    minNetworkSize = 20, 
    maxNetworkSize = 1000, 
    minConnectance = 0.1, 
    frac_list = [0.8],  # [0.5, 0.6, 0.7, 0.8, 0.9]
    reps = 1, # experimental 
    weighted = False, # currently the code is supporting only unweighted
    min_components = False, # experimental 
    # networks_drop_list, 
    )

### Export the data to csv files

In [None]:
export2csv(networks, output_dir = 'data/processed/networks/')

### Additional datasets

Save another set for sensitivity analysis

In [None]:
# Processing and subsampling
networks_sensetivity = process_networks(
    networks,
    minNetworkSize = 20, 
    maxNetworkSize = 1000, 
    minConnectance = 0.1, 
    frac_list = [0.7,0.75,0.8,0.85,0.9,0.95],
    )
export2csv(networks_sensetivity, output_dir='data/processed/networks/sensitivity/', file_prefix='subsamples_sensitivity')

Processing and subsampling: 100%|██████████| 713/713 [00:45<00:00, 15.59it/s]
Exporting to csv: 100%|██████████| 538/538 [00:10<00:00, 53.34it/s]


Save another set for connectance threshold analysis

In [None]:
# Processing and subsampling
networks_filtered = process_networks(
    networks,
    minNetworkSize = 20, 
    maxNetworkSize = 1000, 
    minConnectance = 0.1, 
    frac_list = [0.8],
    reverse_filters=True
    )
export2csv(networks_filtered, output_dir='data/processed/networks/filtering/', file_prefix='subsamples_filtered')

Processing and subsampling:  80%|███████▉  | 569/713 [01:21<00:16,  8.71it/s]

additional subsamples with biased sampling

In [None]:
networks_lowDegBias_sampling = process_networks(networks, frac_list=[0.8], degree_biased='low')
networks_highDegBias_sampling = process_networks(networks, frac_list=[0.8], degree_biased='high')

In [None]:
# Export to CSV
export2csv(networks_lowDegBias_sampling, output_dir='data/processed/networks/biased_sampling/', file_prefix='lowDegBiasSampling')
export2csv(networks_highDegBias_sampling, output_dir='data/processed/networks/biased_sampling/', file_prefix='highDegBiasSampling')

Exporting to csv:   0%|          | 0/538 [00:00<?, ?it/s]

Exporting to csv: 100%|██████████| 538/538 [00:02<00:00, 190.25it/s]
Exporting to csv: 100%|██████████| 538/538 [00:02<00:00, 192.21it/s]
