In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.linalg as la

# Load the aggregated adjacency matrices

In [2]:
contact_adj_mat_files = ['output/temporally_aggregate_matrices/contact/agg_mat_InVS13.npy',
                         'output/temporally_aggregate_matrices/contact/agg_mat_InVS15.npy',
                         'output/temporally_aggregate_matrices/contact/agg_mat_LH10.npy',
                         'output/temporally_aggregate_matrices/contact/agg_mat_LyonSchool.npy',
                         'output/temporally_aggregate_matrices/contact/agg_mat_SFHH.npy',
                         'output/temporally_aggregate_matrices/contact/agg_mat_Thiers13.npy']

copresence_adj_mat_files = ['output/temporally_aggregate_matrices/co-presence/agg_mat_pres_InVS13.npy',
                            'output/temporally_aggregate_matrices/co-presence/agg_mat_pres_InVS15.npy',
                            'output/temporally_aggregate_matrices/co-presence/agg_mat_pres_LH10.npy',
                            'output/temporally_aggregate_matrices/co-presence/agg_mat_pres_LyonSchool.npy',
                            'output/temporally_aggregate_matrices/co-presence/agg_mat_pres_SFHH.npy',
                            'output/temporally_aggregate_matrices/co-presence/agg_mat_pres_Thiers13.npy']

agg_mat_InVS13 = np.load(contact_adj_mat_files[0])
agg_mat_InVS15 = np.load(contact_adj_mat_files[1])
agg_mat_LH10 = np.load(contact_adj_mat_files[2])
agg_mat_LyonSchool = np.load(contact_adj_mat_files[3])
agg_mat_SFHH = np.load(contact_adj_mat_files[4])
agg_mat_Thiers13 = np.load(contact_adj_mat_files[5])

agg_mat_pres_InVS13 = np.load(copresence_adj_mat_files[0])
agg_mat_pres_InVS15 = np.load(copresence_adj_mat_files[1])
agg_mat_pres_LH10 = np.load(copresence_adj_mat_files[2])
agg_mat_pres_LyonSchool = np.load(copresence_adj_mat_files[3])
agg_mat_pres_SFHH = np.load(copresence_adj_mat_files[4])
agg_mat_pres_Thiers13 = np.load(copresence_adj_mat_files[5])

# Comparison of Sampled Network Statistics

For each of the 6 copresence networks, and for each sampling algorithm, and for each sampling ratio, f = [0.9, 0.8, 0.7, 0.6, 0.5], create a subsampled network and calculate the network statistics.

In [3]:
%run graph_stats.ipynb
%run graph_sampling.ipynb

In [4]:
sampling_ratios = [0.9, 0.8, 0.7, 0.6, 0.5]

## InVS13 Copresence Sampling

In [5]:
single_stats = []

# Append actual contact network stats at the top
data = ['InVS13_contact']
A = agg_mat_InVS13
data.append(get_num_vertices(A))
data.append(get_num_edges(A))
data.append(compute_graph_volume(A))
data.append(compute_graph_density(A))
data.append(compute_average_degree(A))
data.append(compute_average_clustering_coefficient(A))
single_stats.append(data)

for f in sampling_ratios:
    # Uniform Node Sampling
    A_s = graph_sampling_algo_uniform_node_sampling(agg_mat_pres_InVS13, f)
    data_title = 'pres_InVS13_UNS_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Uniform Edge Sampling
    A_s = graph_sampling_algo_uniform_edge_sampling(agg_mat_pres_InVS13, f)
    data_title = 'pres_InVS13_UES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Metropolis-Hastings Random Walk with 5% random jump probability
    r_jump_prob = 0.05
    A_s = graph_sampling_algo_metropolis_hastings_random_walk(agg_mat_pres_InVS13, f, r_jump_prob)
    data_title = 'pres_InVS13_MHRW_rjump{}_{}%'.format(r_jump_prob, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Frontier Sampling using RW of dimension scaled to 5% of number of vertices
    FS_dim = int(0.05 * get_num_vertices(agg_mat_pres_InVS13))
    A_s = graph_sampling_algo_frontier_sampling(agg_mat_pres_InVS13, f, FS_dim)
    data_title = 'pres_InVS13_FS_dim{}_{}%'.format(FS_dim, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Snowball Expansion Sampling
    A_s = graph_sampling_algo_snowball_expansion_sampling(agg_mat_pres_InVS13, f)
    data_title = 'pres_InVS13_SES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
column_names = ['Dataset', 'Num Vertices', 'Num Edges', 'Volume',
                'Density', 'Avg Degree', 'Avg Clustering Coeff']
InVS13_df = pd.DataFrame(single_stats, columns = column_names)

InVS13_df

Unnamed: 0,Dataset,Num Vertices,Num Edges,Volume,Density,Avg Degree,Avg Clustering Coeff
0,InVS13_contact,100,755,9827.0,0.152525,196.54,0.305609
1,pres_InVS13_UNS_90.0%,100,2767,233681.0,0.55899,4673.62,0.081996
2,pres_InVS13_UES_90.0%,100,3524,365074.0,0.711919,7301.48,0.145736
3,pres_InVS13_MHRW_rjump0.05_90.0%,100,2307,271510.0,0.466061,5430.2,0.22391
4,pres_InVS13_FS_dim5_90.0%,100,3524,358005.0,0.711919,7160.1,0.279693
5,pres_InVS13_SES_90.0%,100,2730,263033.0,0.551515,5260.66,0.086046
6,pres_InVS13_UNS_80.0%,100,2545,277771.0,0.514141,5555.42,0.155652
7,pres_InVS13_UES_80.0%,100,3132,323504.0,0.632727,6470.08,0.083553
8,pres_InVS13_MHRW_rjump0.05_80.0%,100,1767,213397.0,0.35697,4267.94,0.465255
9,pres_InVS13_FS_dim5_80.0%,100,3132,314831.0,0.632727,6296.62,0.070015


## InVS15 Copresence Sampling

In [6]:
single_stats = []

# Append actual contact network stats at the top
data = ['InVS15_contact']
A = agg_mat_InVS15
data.append(get_num_vertices(A))
data.append(get_num_edges(A))
data.append(compute_graph_volume(A))
data.append(compute_graph_density(A))
data.append(compute_average_degree(A))
data.append(compute_average_clustering_coefficient(A))
single_stats.append(data)

for f in sampling_ratios:
    # Uniform Node Sampling
    A_s = graph_sampling_algo_uniform_node_sampling(agg_mat_pres_InVS15, f)
    data_title = 'pres_InVS15_UNS_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Uniform Edge Sampling
    A_s = graph_sampling_algo_uniform_edge_sampling(agg_mat_pres_InVS15, f)
    data_title = 'pres_InVS15_UES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Metropolis-Hastings Random Walk with 5% random jump probability
    r_jump_prob = 0.05
    A_s = graph_sampling_algo_metropolis_hastings_random_walk(agg_mat_pres_InVS15, f, r_jump_prob)
    data_title = 'pres_InVS15_MHRW_rjump{}_{}%'.format(r_jump_prob, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Frontier Sampling using RW of dimension scaled to 5% of number of vertices
    FS_dim = int(0.05 * get_num_vertices(agg_mat_pres_InVS15))
    A_s = graph_sampling_algo_frontier_sampling(agg_mat_pres_InVS15, f, FS_dim)
    data_title = 'pres_InVS15_FS_dim{}_{}%'.format(FS_dim, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Snowball Expansion Sampling
    A_s = graph_sampling_algo_snowball_expansion_sampling(agg_mat_pres_InVS15, f)
    data_title = 'pres_InVS15_SES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
column_names = ['Dataset', 'Num Vertices', 'Num Edges', 'Volume',
                'Density', 'Avg Degree', 'Avg Clustering Coeff']
InVS15_df = pd.DataFrame(single_stats, columns = column_names)

InVS15_df

Unnamed: 0,Dataset,Num Vertices,Num Edges,Volume,Density,Avg Degree,Avg Clustering Coeff
0,InVS15_contact,232,4274,78249.0,0.159501,674.560345,0.235446
1,pres_InVS15_UNS_90.0%,232,11692,955322.0,0.436334,8235.534483,0.097264
2,pres_InVS15_UES_90.0%,232,15053,1149143.0,0.561763,9906.405172,0.068233
3,pres_InVS15_MHRW_rjump0.05_90.0%,232,9457,835280.0,0.352926,7200.689655,0.712623
4,pres_InVS15_FS_dim11_90.0%,232,15053,1157620.0,0.561763,9979.482759,0.072128
5,pres_InVS15_SES_90.0%,232,11636,910762.0,0.434244,7851.396552,0.070593
6,pres_InVS15_UNS_80.0%,232,10819,831278.0,0.403754,7166.189655,1.046549
7,pres_InVS15_UES_80.0%,232,13380,1014352.0,0.499328,8744.413793,0.062292
8,pres_InVS15_MHRW_rjump0.05_80.0%,232,9370,816824.0,0.349679,7041.586207,0.110789
9,pres_InVS15_FS_dim11_80.0%,232,13380,1042439.0,0.499328,8986.543103,0.057154


## LH10 Copresence Sampling

In [7]:
single_stats = []

# Append actual contact network stats at the top
data = ['LH10_contact']
A = agg_mat_LH10
data.append(get_num_vertices(A))
data.append(get_num_edges(A))
data.append(compute_graph_volume(A))
data.append(compute_graph_density(A))
data.append(compute_average_degree(A))
data.append(compute_average_clustering_coefficient(A))
single_stats.append(data)

for f in sampling_ratios:
    # Uniform Node Sampling
    A_s = graph_sampling_algo_uniform_node_sampling(agg_mat_pres_LH10, f)
    data_title = 'pres_LH10_UNS_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Uniform Edge Sampling
    A_s = graph_sampling_algo_uniform_edge_sampling(agg_mat_pres_LH10, f)
    data_title = 'pres_LH10_UES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Metropolis-Hastings Random Walk with 5% random jump probability
    r_jump_prob = 0.05
    A_s = graph_sampling_algo_metropolis_hastings_random_walk(agg_mat_pres_LH10, f, r_jump_prob)
    data_title = 'pres_LH10_MHRW_rjump{}_{}%'.format(r_jump_prob, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Frontier Sampling using RW of dimension scaled to 5% of number of vertices
    FS_dim = int(0.05 * get_num_vertices(agg_mat_pres_LH10))
    A_s = graph_sampling_algo_frontier_sampling(agg_mat_pres_LH10, f, FS_dim)
    data_title = 'pres_LH10_FS_dim{}_{}%'.format(FS_dim, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Snowball Expansion Sampling
    A_s = graph_sampling_algo_snowball_expansion_sampling(agg_mat_pres_LH10, f)
    data_title = 'pres_LH10_SES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
column_names = ['Dataset', 'Num Vertices', 'Num Edges', 'Volume',
                'Density', 'Avg Degree', 'Avg Clustering Coeff']
LH10_df = pd.DataFrame(single_stats, columns = column_names)

LH10_df

Unnamed: 0,Dataset,Num Vertices,Num Edges,Volume,Density,Avg Degree,Avg Clustering Coeff
0,LH10_contact,81,1156,33070.0,0.35679,816.54321,3.144838
1,pres_LH10_UNS_90.0%,81,981,113331.0,0.302778,2798.296296,7.968377
2,pres_LH10_UES_90.0%,81,1243,126256.0,0.383642,3117.432099,6.607528
3,pres_LH10_MHRW_rjump0.05_90.0%,81,849,84017.0,0.262037,2074.493827,14.043307
4,pres_LH10_FS_dim4_90.0%,81,1243,129046.0,0.383642,3186.320988,8.123311
5,pres_LH10_SES_90.0%,81,956,105812.0,0.295062,2612.641975,2.758188
6,pres_LH10_UNS_80.0%,81,913,103498.0,0.28179,2555.506173,7.667826
7,pres_LH10_UES_80.0%,81,1105,120302.0,0.341049,2970.419753,10.296989
8,pres_LH10_MHRW_rjump0.05_80.0%,81,765,89207.0,0.236111,2202.641975,11.233766
9,pres_LH10_FS_dim4_80.0%,81,1105,119811.0,0.341049,2958.296296,7.008626


## LyonSchool Copresence Sampling

In [8]:
single_stats = []

# Append actual contact network stats at the top
data = ['LyonSchool_contact']
A = agg_mat_LyonSchool
data.append(get_num_vertices(A))
data.append(get_num_edges(A))
data.append(compute_graph_volume(A))
data.append(compute_graph_density(A))
data.append(compute_average_degree(A))
data.append(compute_average_clustering_coefficient(A))
single_stats.append(data)

for f in sampling_ratios:
    # Uniform Node Sampling
    A_s = graph_sampling_algo_uniform_node_sampling(agg_mat_pres_LyonSchool, f)
    data_title = 'pres_LyonSchool_UNS_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Uniform Edge Sampling
    A_s = graph_sampling_algo_uniform_edge_sampling(agg_mat_pres_LyonSchool, f)
    data_title = 'pres_LyonSchool_UES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Metropolis-Hastings Random Walk with 5% random jump probability
    r_jump_prob = 0.05
    A_s = graph_sampling_algo_metropolis_hastings_random_walk(agg_mat_pres_LyonSchool, f, r_jump_prob)
    data_title = 'pres_LyonSchool_MHRW_rjump{}_{}%'.format(r_jump_prob, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Frontier Sampling using RW of dimension scaled to 5% of number of vertices
    FS_dim = int(0.05 * get_num_vertices(agg_mat_pres_LyonSchool))
    A_s = graph_sampling_algo_frontier_sampling(agg_mat_pres_LyonSchool, f, FS_dim)
    data_title = 'pres_LyonSchool_FS_dim{}_{}%'.format(FS_dim, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Snowball Expansion Sampling
    A_s = graph_sampling_algo_snowball_expansion_sampling(agg_mat_pres_LyonSchool, f)
    data_title = 'pres_LyonSchool_SES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
column_names = ['Dataset', 'Num Vertices', 'Num Edges', 'Volume',
                'Density', 'Avg Degree', 'Avg Clustering Coeff']
LyonSchool_df = pd.DataFrame(single_stats, columns = column_names)

LyonSchool_df

Unnamed: 0,Dataset,Num Vertices,Num Edges,Volume,Density,Avg Degree,Avg Clustering Coeff
0,LyonSchool_contact,242,8317,125773.0,0.28521,1039.446281,0.144241
1,pres_LyonSchool_UNS_90.0%,242,18666,4624291.0,0.640102,38217.280992,0.014007
2,pres_LyonSchool_UES_90.0%,242,23935,5919427.0,0.820788,48920.884298,0.013047
3,pres_LyonSchool_MHRW_rjump0.05_90.0%,242,15827,4267332.0,0.542745,35267.206612,0.017042
4,pres_LyonSchool_FS_dim12_90.0%,242,23935,5963419.0,0.820788,49284.454545,0.012938
5,pres_LyonSchool_SES_90.0%,242,18505,4716963.0,0.63458,38983.165289,0.012884
6,pres_LyonSchool_UNS_80.0%,242,17171,4192387.0,0.588834,34647.826446,0.011888
7,pres_LyonSchool_UES_80.0%,242,21276,5290409.0,0.729605,43722.38843,0.011633
8,pres_LyonSchool_MHRW_rjump0.05_80.0%,242,14400,3971843.0,0.49381,32825.14876,0.017743
9,pres_LyonSchool_FS_dim12_80.0%,242,21276,5289757.0,0.729605,43717.0,0.011607


## SFHH Copresence Sampling

In [9]:
single_stats = []

# Append actual contact network stats at the top
data = ['SFHH_contact']
A = agg_mat_SFHH
data.append(get_num_vertices(A))
data.append(get_num_edges(A))
data.append(compute_graph_volume(A))
data.append(compute_graph_density(A))
data.append(compute_average_degree(A))
data.append(compute_average_clustering_coefficient(A))
single_stats.append(data)

for f in sampling_ratios:
    # Uniform Node Sampling
    A_s = graph_sampling_algo_uniform_node_sampling(agg_mat_pres_SFHH, f)
    data_title = 'pres_SFHH_UNS_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Uniform Edge Sampling
    A_s = graph_sampling_algo_uniform_edge_sampling(agg_mat_pres_SFHH, f)
    data_title = 'pres_SFHH_UES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Metropolis-Hastings Random Walk with 5% random jump probability
    r_jump_prob = 0.05
    A_s = graph_sampling_algo_metropolis_hastings_random_walk(agg_mat_pres_SFHH, f, r_jump_prob)
    data_title = 'pres_SFHH_MHRW_rjump{}_{}%'.format(r_jump_prob, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Frontier Sampling using RW of dimension scaled to 5% of number of vertices
    FS_dim = int(0.05 * get_num_vertices(agg_mat_pres_SFHH))
    A_s = graph_sampling_algo_frontier_sampling(agg_mat_pres_SFHH, f, FS_dim)
    data_title = 'pres_SFHH_FS_dim{}_{}%'.format(FS_dim, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Snowball Expansion Sampling
    A_s = graph_sampling_algo_snowball_expansion_sampling(agg_mat_pres_SFHH, f)
    data_title = 'pres_SFHH_SES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
column_names = ['Dataset', 'Num Vertices', 'Num Edges', 'Volume',
                'Density', 'Avg Degree', 'Avg Clustering Coeff']
SFHH_df = pd.DataFrame(single_stats, columns = column_names)

SFHH_df

Unnamed: 0,Dataset,Num Vertices,Num Edges,Volume,Density,Avg Degree,Avg Clustering Coeff
0,SFHH_contact,403,9565,70261.0,0.118082,348.689826,0.513001
1,pres_SFHH_UNS_90.0%,403,51503,967678.0,0.635816,4802.372208,0.343145
2,pres_SFHH_UES_90.0%,403,66202,1270829.0,0.817278,6306.843672,0.403645
3,pres_SFHH_MHRW_rjump0.05_90.0%,403,41435,878694.0,0.511524,4360.764268,0.778779
4,pres_SFHH_FS_dim20_90.0%,403,66202,1277611.0,0.817278,6340.501241,0.519182
5,pres_SFHH_SES_90.0%,403,51292,1036371.0,0.633211,5143.280397,1.534089
6,pres_SFHH_UNS_80.0%,403,47252,906414.0,0.583336,4498.332506,0.546547
7,pres_SFHH_UES_80.0%,403,58846,1129589.0,0.726467,5605.900744,0.531365
8,pres_SFHH_MHRW_rjump0.05_80.0%,403,37966,842598.0,0.468699,4181.627792,0.678316
9,pres_SFHH_FS_dim20_80.0%,403,58846,1133969.0,0.726467,5627.637717,0.673683


## Thiers13 Copresence Sampling

In [10]:
single_stats = []

# Append actual contact network stats at the top
data = ['Thiers13_contact']
A = agg_mat_Thiers13
data.append(get_num_vertices(A))
data.append(get_num_edges(A))
data.append(compute_graph_volume(A))
data.append(compute_graph_density(A))
data.append(compute_average_degree(A))
data.append(compute_average_clustering_coefficient(A))
single_stats.append(data)

for f in sampling_ratios:
    # Uniform Node Sampling
    A_s = graph_sampling_algo_uniform_node_sampling(agg_mat_pres_Thiers13, f)
    data_title = 'pres_Thiers13_UNS_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Uniform Edge Sampling
    A_s = graph_sampling_algo_uniform_edge_sampling(agg_mat_pres_Thiers13, f)
    data_title = 'pres_Thiers13_UES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Metropolis-Hastings Random Walk with 5% random jump probability
    r_jump_prob = 0.05
    A_s = graph_sampling_algo_metropolis_hastings_random_walk(agg_mat_pres_Thiers13, f, r_jump_prob)
    data_title = 'pres_Thiers13_MHRW_rjump{}_{}%'.format(r_jump_prob, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Frontier Sampling using RW of dimension scaled to 5% of number of vertices
    FS_dim = int(0.05 * get_num_vertices(agg_mat_pres_Thiers13))
    A_s = graph_sampling_algo_frontier_sampling(agg_mat_pres_Thiers13, f, FS_dim)
    data_title = 'pres_Thiers13_FS_dim{}_{}%'.format(FS_dim, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
    # Snowball Expansion Sampling
    A_s = graph_sampling_algo_snowball_expansion_sampling(agg_mat_pres_Thiers13, f)
    data_title = 'pres_Thiers13_SES_{}%'.format(f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)
    
column_names = ['Dataset', 'Num Vertices', 'Num Edges', 'Volume',
                'Density', 'Avg Degree', 'Avg Clustering Coeff']
Thiers13_df = pd.DataFrame(single_stats, columns = column_names)

Thiers13_df

Unnamed: 0,Dataset,Num Vertices,Num Edges,Volume,Density,Avg Degree,Avg Clustering Coeff
0,Thiers13_contact,332,5818,188508.0,0.105886,1135.590361,0.255922
1,pres_Thiers13_UNS_90.0%,332,30455,12977135.0,0.554271,78175.512048,0.086337
2,pres_Thiers13_UES_90.0%,332,39147,16739352.0,0.712463,100839.46988,0.102156
3,pres_Thiers13_MHRW_rjump0.05_90.0%,332,24390,10653816.0,0.44389,64179.614458,0.117095
4,pres_Thiers13_FS_dim16_90.0%,332,39147,16699488.0,0.712463,100599.325301,0.099409
5,pres_Thiers13_SES_90.0%,332,30394,13083975.0,0.553161,78819.126506,0.119833
6,pres_Thiers13_UNS_80.0%,332,27888,12206859.0,0.507553,73535.295181,0.129985
7,pres_Thiers13_UES_80.0%,332,34797,14976854.0,0.633295,90222.012048,0.088965
8,pres_Thiers13_MHRW_rjump0.05_80.0%,332,22165,8844596.0,0.403396,53280.698795,0.197365
9,pres_Thiers13_FS_dim16_80.0%,332,34797,14935649.0,0.633295,89973.789157,0.08742


# Bugged Code

In [None]:
# Forest Fire Sampling with 50% incident burn probability
    burn_p = 0.3
    A_s = graph_sampling_algo_forest_fire_sampling(agg_mat_pres_InVS13, f, burn_p)
    data_title = 'pres_InVS13_FFS_burn{}_{}%'.format(burn_p, f * 100)
    data = [data_title]
    data.append(get_num_vertices(A_s))
    data.append(get_num_edges(A_s))
    data.append(compute_graph_volume(A_s))
    data.append(compute_graph_density(A_s))
    data.append(compute_average_degree(A_s))
    data.append(compute_average_clustering_coefficient(A_s))
    single_stats.append(data)