# Basic Measurements

In [1]:
import glob
import networkx as nx
from tqdm.notebook import tqdm
import numpy as np

In [2]:
def read_network(filename):
    G = nx.read_gml(filename)
    G.remove_edges_from(list(nx.selfloop_edges(G)))
    G.remove_nodes_from(list(nx.isolates(G)))
    return G

In [3]:
whole_network = nx.read_gml('Processed/grouped-network.gml')

In [9]:
total_months_files = sorted(glob.glob('Processed/years/average/months/*.gml', recursive=True))
total_temp_files = sorted(glob.glob('Processed/years/average/temp/*.gml', recursive=True))

total_months = []
for file in tqdm(total_months_files):
    try:
        G = read_network(file)
        total_months.append(G)
    except Exception as e:
        print(file)
        
total_temp = []
for file in tqdm(total_temp_files):
    G = read_network(file)
    total_temp.append(G)

  0%|          | 0/12 [00:00<?, ?it/s]

Processed/years/average/months/network-10.gml


  0%|          | 0/6 [00:00<?, ?it/s]

In [6]:
members_months_files = sorted(glob.glob('Processed/members/average/months/*.gml', recursive=True))
members_temp_files = sorted(glob.glob('Processed/members/average/temp/*.gml', recursive=True))

members_months = []
for file in tqdm(members_months_files):
    G = read_network(file)
    members_months.append(G)

members_temp = []
for file in tqdm(members_temp_files):
    G = read_network(file)
    members_temp.append(G)

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

In [7]:
casuals_months_files = sorted(glob.glob('Processed/casuals/average/months/*.gml', recursive=True))
casuals_temp_files = sorted(glob.glob('Processed/casuals/average/temp/*.gml', recursive=True))

casuals_months = []
for file in tqdm(casuals_months_files):
    G = read_network(file)
    casuals_months.append(G)

casuals_temp = []
for file in tqdm(casuals_temp_files):
    G = read_network(file)
    casuals_temp.append(G)

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

## number of nodes and edges

In [10]:
def get_nodes_and_edges(Gs):

    if isinstance(Gs, nx.Graph):

        return Gs.number_of_nodes(), Gs.number_of_edges()

    elif isinstance(Gs, list):

        nodes, edges = zip(*[(G.number_of_nodes(), G.number_of_edges()) for G in Gs])

        return (np.mean(nodes), np.std(nodes)), (np.mean(edges), np.std(edges))

In [11]:
# whole network
get_nodes_and_edges(whole_network)

(760, 294404)

In [21]:
# all trips / months
print(f"all trips / months: {get_nodes_and_edges(total_months)}")

# all trips / temp
print(f"all trips / temp: {get_nodes_and_edges(total_temp)}")


all trips / months: ((691.1666666666666, 89.09155715080725), (152395.33333333334, 42698.121711876534))
all trips / temp: ((715.5, 80.23662920802859), (157332.5, 85151.35426746502))


In [22]:
# members / months
print(f"members / months: {get_nodes_and_edges(members_months)}")

# members / temp
print(f"members / temp: {get_nodes_and_edges(members_temp)}")


members / months: ((666.1666666666666, 72.4141253869406), (101564.75, 23713.028100339696))
members / temp: ((684.6666666666666, 81.93425548708075), (108232.33333333333, 55015.91310298826))


In [23]:
# casuals / months
print(f"casuals / months: {get_nodes_and_edges(casuals_months)}")

# casuals / temp
print(f"casuals / temp: {get_nodes_and_edges(casuals_temp)}")


casuals / months: ((690.8333333333334, 88.86959860129646), (132261.91666666666, 45855.348876400334))
casuals / temp: ((709.3333333333334, 91.07811055474427), (139776.0, 86033.50780170868))


## connected components

In [16]:
def analyze_components(Gs):

    if isinstance(Gs, nx.Graph):

        print(f"is weakly connected? {nx.is_weakly_connected(Gs)}")

        print(f"is strongly connected? {nx.is_strongly_connected(Gs)}")
        
        sccs = sorted(list(nx.strongly_connected_components(Gs)), key=lambda x: -len(x))
        
        print(f"number of SCCs: {len(sccs)}")
        print(f"size of biggest SCCs: {[len(scc) for scc in sccs[:]]}")

    elif isinstance(Gs, list):

        are_sc = [nx.is_strongly_connected(G) for G in Gs]

        if np.all(are_sc):
            print("they are all strongly connected!")
            
        elif np.any(are_sc):
            num_sc = sum(are_sc)
            print(f"{num_sc} are strongly connected")

        are_wc = [nx.is_weakly_connected(G) for G in Gs]

        if np.all(are_wc):
            print("they are all weakly connected!")
            
        elif np.any(are_wc):
            num_wc = sum(are_wc)
            print(f"{num_wc} are weakly connected and {len(Gs) - num_wc} are not even weakly connected.")

        else:
            print("none of them are even weakly connected!")

        scc_count = [nx.number_strongly_connected_components(G) for G in Gs]
        biggest_scc = [max([len(scc) for scc in nx.strongly_connected_components(G)]) for G in Gs]

        print(f"number of SCCs in each graph: {scc_count}")
        print(f"size of the biggest SCC in each graph: {biggest_scc}")

In [17]:
analyze_components(whole_network)

is weakly connected? True
is strongly connected? False
number of SCCs: 2
size of biggest SCCs: [759, 1]


In [18]:
print("total trips / months")
analyze_components(total_months)
print("total trips / temp")
analyze_components(total_temp)

total trips / months
4 are strongly connected
they are all weakly connected!
number of SCCs in each graph: [3, 2, 1, 1, 10, 6, 4, 1, 3, 1, 3, 2]
size of the biggest SCC in each graph: [563, 565, 565, 565, 737, 745, 750, 755, 758, 756, 755, 754]
total trips / temp
1 are strongly connected
4 are weakly connected and 2 are not even weakly connected.
number of SCCs in each graph: [37, 47, 5, 1, 2, 3]
size of the biggest SCC in each graph: [500, 676, 748, 758, 759, 757]


In [19]:
print("casual users / months")
analyze_components(casuals_months)
print("casual users / temp")
analyze_components(casuals_temp)

casual users / months
2 are strongly connected
they are all weakly connected!
number of SCCs in each graph: [5, 2, 1, 3, 13, 7, 4, 2, 2, 1, 4, 2]
size of the biggest SCC in each graph: [558, 565, 565, 563, 727, 743, 750, 754, 758, 756, 754, 754]
casual users / temp
2 are strongly connected
4 are weakly connected and 2 are not even weakly connected.
number of SCCs in each graph: [61, 51, 8, 1, 1, 3]
size of the biggest SCC in each graph: [447, 655, 745, 758, 759, 757]


In [20]:
print("members / months")
analyze_components(members_months)
print("members / temp")
analyze_components(members_temp)

members / months
2 are strongly connected
5 are weakly connected and 7 are not even weakly connected.
number of SCCs in each graph: [10, 1, 1, 6, 33, 21, 22, 22, 30, 17, 26, 24]
size of the biggest SCC in each graph: [547, 565, 565, 551, 654, 683, 689, 708, 697, 710, 697, 685]
members / temp
1 are weakly connected and 5 are not even weakly connected.
number of SCCs in each graph: [55, 34, 28, 25, 16, 18]
size of the biggest SCC in each graph: [458, 606, 682, 716, 732, 720]


## degree and strength distributions

In [None]:
import plotting.distributions as dist

import importlib
importlib.reload(plotting.distributions)

In [75]:
def plot_all(Gs, G_filepaths, user_type, net_type, bins):
    
    min_kin, max_kin, min_kout, max_kout = np.inf, -np.inf, np.inf, -np.inf
    
    for G in Gs:
        in_degrees = [deg for (id, deg) in G.in_degree(weight=None) if deg > 0]
        out_degrees = [deg for (id, deg) in G.out_degree(weight=None) if deg > 0]
    
        min_kin = min(min(in_degrees), min_kin)
        max_kin = max(max(in_degrees), max_kin)
        min_kout = min(min(out_degrees), min_kout)
        max_kout = max(max(out_degrees), max_kout)
    
    for (G, filepath) in zip(Gs, G_filepaths):
        filename = filepath.split('/')[-1].split('.')[0]
        dist.plot_distribution(G, filename, deg_ext=(min_kin, max_kin, min_kout, max_kout), bins=bins, save=True, save_folder=f'degree_dist/{user_type}/{net_type}')
    
    for weight in ['duration_weights', 'tpd_weights', 'trip_count_weights']:
        
        for G in Gs:
            in_degrees = [deg for (id, deg) in G.in_degree(weight=weight) if deg > 0]
            out_degrees = [deg for (id, deg) in G.out_degree(weight=weight) if deg > 0]
        
            min_kin = min(min(in_degrees), min_kin)
            max_kin = max(max(in_degrees), max_kin)
            min_kout = min(min(out_degrees), min_kout)
            max_kout = max(max(out_degrees), max_kout)
        
        for (G, filepath) in zip(Gs, G_filepaths):
            filename = filepath.split('/')[-1].split('.')[0]
            dist.plot_distribution(G, filename, deg_ext=(min_kin, max_kin, min_kout, max_kout), bins=bins, save=True, save_folder=f'strength_dist/{weight}/{user_type}/{net_type}')

In [62]:
dist.plot_distribution(whole_network, 'grouped-network', save=True, save_folder='degree_dist')

for weight in ['duration_weights', 'tpd_weights', 'trip_count_weights']:
    dist.plot_distribution(whole_network, 'grouped-network', weight=weight, save=True, save_folder=f'strength_dist/{weight}')

In [76]:
# all_trips / averaged months

plot_all(total_months, total_months_files, user_type='total', net_type='months', bins=9)

In [77]:
# all_trips / averaged temp

plot_all(total_temp, total_temp_files, user_type='total', net_type='temp', bins=9)

In [78]:
# casuals / averaged months

plot_all(casuals_months, casuals_months_files, user_type='casuals', net_type='months', bins=9)

In [79]:
# casuals / averaged temp

plot_all(casuals_temp, casuals_temp_files, user_type='casuals', net_type='temp', bins=9)

In [80]:
# members / averaged months

plot_all(members_months, members_months_files, user_type='members', net_type='months', bins=9)

In [81]:
# members / averaged temp

plot_all(members_temp, members_temp_files, user_type='members', net_type='temp', bins=9)

## clustering coefficients

In [31]:
def get_average_clustering_coefficient(G, weight='weight'):

    A = nx.to_numpy_array(G, weight=None)
    W = nx.to_numpy_array(G, weight=weight)

    EYE = np.eye(G.number_of_nodes(), dtype=bool)

    D_tot = (A + A.T).sum(axis=0)
    D_bi = (A @ A)[EYE]
    denominator = 2 * (D_tot * (D_tot - 1) - 2 * D_bi)

    W_temp = (W ** (1/3)) + (W.T ** (1/3))
    numerator = (W_temp @ W_temp @ W_temp)[EYE]

    cc = numerator / denominator

    cc = cc[~np.isnan(cc)]

    return np.mean(cc)

In [53]:
def get_all_cc(Gs):

    if isinstance(Gs, nx.Graph):

        F, I_trip_count, I_duration, I_tpd = get_average_clustering_coefficient(Gs, None),\
                                             get_average_clustering_coefficient(Gs, 'trip_count_weights'),\
                                             get_average_clustering_coefficient(Gs, 'duration_weights'),\
                                             get_average_clustering_coefficient(Gs, 'tpd_weights')

        return F, I_trip_count, I_duration, I_tpd

    elif isinstance(Gs, list):

        Fs, I_trip_counts, I_durations, I_tpds = zip(*[(get_average_clustering_coefficient(G, None),\
                                             get_average_clustering_coefficient(G, 'trip_count_weights'),\
                                             get_average_clustering_coefficient(G, 'duration_weights'),\
                                             get_average_clustering_coefficient(G, 'tpd_weights')) for G in Gs])

        # return (np.mean(Fs), np.std(Fs)), (np.mean(I_trip_counts), np.std(I_trip_counts)),\
        #        (np.mean(I_durations), np.std(I_durations)), (np.mean(I_tpds), np.std(I_tpds))
        return Fs, I_trip_counts, I_durations, I_tpds

In [50]:
# whole network

F, I_trip_count, I_duration, I_tpd = get_all_cc(whole_network)

print(f"Whole network unweighted clustering coefficient: {F}")
print(f"Whole network weighted (trip_count) clustering coefficient: {I_trip_count}")
print(f"Whole network weighted (duration) clustering coefficient: {I_duration}")
print(f"Whole network weighted (tpd) clustering coefficient: {I_tpd}")

  cc = numerator / denominator


Whole network unweighted clustering coefficient: 0.7666361165386057
Whole network weighted (trip_count) clustering coefficient: 1.1317566698350676
Whole network weighted (duration) clustering coefficient: 2.8105667564009877
Whole network weighted (tpd) clustering coefficient: 1.3886364068924162


In [47]:
# all trips / months
Fs_total_months, I_trip_counts_total_months, I_durations_total_months, I_tpds_total_months = get_all_cc(total_months)

print(f"for months networks of all trips, unweighted clustering coefficient: mean = {np.mean(Fs_total_months)}, std = {np.std(Fs_total_months)}")
print(f"for months networks of all trips, weighted (trip_count) clustering coefficient: mean = {np.mean(I_trip_counts_total_months)}, std = {np.std(I_trip_counts_total_months)}")
print(f"for months networks of all trips, weighted (duration) clustering coefficient: mean = {np.mean(I_durations_total_months)}, std = {np.std(I_durations_total_months)}")
print(f"for months networks of all trips, weighted (tpd) clustering coefficient: mean = {np.mean(I_tpds_total_months)}, std = {np.std(I_tpds_total_months)}")

# all trips / temp
Fs_total_temp, I_trip_counts_total_temp, I_durations_total_temp, I_tpds_total_temp = get_all_cc(total_temp)

print(f"for temp networks of all trips, unweighted clustering coefficient: mean = {np.mean(Fs_total_temp)}, std = {np.std(Fs_total_temp)}")
print(f"for temp networks of all trips, weighted (trip_count) clustering coefficient: mean = {np.mean(I_trip_counts_total_temp)}, std = {np.std(I_trip_counts_total_temp)}")
print(f"for temp networks of all trips, weighted (duration) clustering coefficient: mean = {np.mean(I_durations_total_temp)}, std = {np.std(I_durations_total_temp)}")
print(f"for temp networks of all trips, weighted (tpd) clustering coefficient: mean = {np.mean(I_tpds_total_temp)}, std = {np.std(I_tpds_total_temp)}")


  cc = numerator / denominator


for months networks of all trips, unweighted clustering coefficient: mean = 0.6207217312659078, std = 0.049224396483133925
for months networks of all trips, weighted (trip_count) clustering coefficient: mean = 0.31266929206579763, std = 0.05748848995015758
for months networks of all trips, weighted (duration) clustering coefficient: mean = 0.5937442637926144, std = 0.115290544739459
for months networks of all trips, weighted (tpd) clustering coefficient: mean = 0.6788319728939405, std = 0.103159905259996
for temp networks of all trips, unweighted clustering coefficient: mean = 0.572985811188754, std = 0.17746611829527018
for temp networks of all trips, weighted (trip_count) clustering coefficient: mean = 0.3006936978723745, std = 0.132782833615733
for temp networks of all trips, weighted (duration) clustering coefficient: mean = 0.623859765641903, std = 0.28546950309015706
for temp networks of all trips, weighted (tpd) clustering coefficient: mean = 0.5817763372334237, std = 0.20742463

In [48]:
# members / months
Fs_members_months, I_trip_counts_members_months, I_durations_members_months, I_tpds_members_months = get_all_cc(members_months)

print(f"for months networks of members, unweighted clustering coefficient: mean = {np.mean(Fs_members_months)}, std = {np.std(Fs_members_months)}")
print(f"for months networks of members, weighted (trip_count) clustering coefficient: mean = {np.mean(I_trip_counts_members_months)}, std = {np.std(I_trip_counts_members_months)}")
print(f"for months networks of members, weighted (duration) clustering coefficient: mean = {np.mean(I_durations_members_months)}, std = {np.std(I_durations_members_months)}")
print(f"for months networks of members, weighted (tpd) clustering coefficient: mean = {np.mean(I_tpds_members_months)}, std = {np.std(I_tpds_members_months)}")

# members / temp
Fs_members_temp, I_trip_counts_members_temp, I_durations_members_temp, I_tpds_members_temp = get_all_cc(members_temp)

print(f"for temp networks of members, unweighted clustering coefficient: mean = {np.mean(Fs_members_temp)}, std = {np.std(Fs_members_temp)}")
print(f"for temp networks of members, weighted (trip_count) clustering coefficient: mean = {np.mean(I_trip_counts_members_temp)}, std = {np.std(I_trip_counts_members_temp)}")
print(f"for temp networks of members, weighted (duration) clustering coefficient: mean = {np.mean(I_durations_members_temp)}, std = {np.std(I_durations_members_temp)}")
print(f"for temp networks of members, weighted (tpd) clustering coefficient: mean = {np.mean(I_tpds_members_temp)}, std = {np.std(I_tpds_members_temp)}")


  cc = numerator / denominator


for months networks of members, unweighted clustering coefficient: mean = 0.5779380850922655, std = 0.04795294332622858
for months networks of members, weighted (trip_count) clustering coefficient: mean = 0.27847570426700147, std = 0.060413901944494516
for months networks of members, weighted (duration) clustering coefficient: mean = 0.5148611657483274, std = 0.10613029502995511
for months networks of members, weighted (tpd) clustering coefficient: mean = 0.6620791805818148, std = 0.11599002345116216
for temp networks of members, unweighted clustering coefficient: mean = 0.5420500334582922, std = 0.16728391453000577
for temp networks of members, weighted (trip_count) clustering coefficient: mean = 0.2636549796259393, std = 0.11275529295415054
for temp networks of members, weighted (duration) clustering coefficient: mean = 0.5237057265735183, std = 0.22267273957917816
for temp networks of members, weighted (tpd) clustering coefficient: mean = 0.5518421888941397, std = 0.1930131022206607

In [49]:
# casuals / months
Fs_casuals_months, I_trip_counts_casuals_months, I_durations_casuals_months, I_tpds_casuals_months = get_all_cc(casuals_months)

print(f"for months networks of casuals, unweighted clustering coefficient: mean = {np.mean(Fs_casuals_months)}, std = {np.std(Fs_casuals_months)}")
print(f"for months networks of casuals, weighted (trip_count) clustering coefficient: mean = {np.mean(I_trip_counts_casuals_months)}, std = {np.std(I_trip_counts_casuals_months)}")
print(f"for months networks of casuals, weighted (duration) clustering coefficient: mean = {np.mean(I_durations_casuals_months)}, std = {np.std(I_durations_casuals_months)}")
print(f"for months networks of casuals, weighted (tpd) clustering coefficient: mean = {np.mean(I_tpds_casuals_months)}, std = {np.std(I_tpds_casuals_months)}")

# casuals / temp
Fs_casuals_temp, I_trip_counts_casuals_temp, I_durations_casuals_temp, I_tpds_casuals_temp = get_all_cc(casuals_temp)

print(f"for temp networks of casuals, unweighted clustering coefficient: mean = {np.mean(Fs_casuals_temp)}, std = {np.std(Fs_casuals_temp)}")
print(f"for temp networks of casuals, weighted (trip_count) clustering coefficient: mean = {np.mean(I_trip_counts_casuals_temp)}, std = {np.std(I_trip_counts_casuals_temp)}")
print(f"for temp networks of casuals, weighted (duration) clustering coefficient: mean = {np.mean(I_durations_casuals_temp)}, std = {np.std(I_durations_casuals_temp)}")
print(f"for temp networks of casuals, weighted (tpd) clustering coefficient: mean = {np.mean(I_tpds_casuals_temp)}, std = {np.std(I_tpds_casuals_temp)}")


  cc = numerator / denominator


for months networks of casuals, unweighted clustering coefficient: mean = 0.5775650916118004, std = 0.06847991004941895
for months networks of casuals, weighted (trip_count) clustering coefficient: mean = 0.23312626057708127, std = 0.05831604502268342
for months networks of casuals, weighted (duration) clustering coefficient: mean = 0.4464320082299024, std = 0.1178221444990911
for months networks of casuals, weighted (tpd) clustering coefficient: mean = 0.5092716635072668, std = 0.10876794971356403
for temp networks of casuals, unweighted clustering coefficient: mean = 0.5163103371338763, std = 0.21283119953674362
for temp networks of casuals, weighted (trip_count) clustering coefficient: mean = 0.22301399368613153, std = 0.11823364148629396
for temp networks of casuals, weighted (duration) clustering coefficient: mean = 0.46434431650864755, std = 0.2553804596354678
for temp networks of casuals, weighted (tpd) clustering coefficient: mean = 0.441746469946517, std = 0.2005753102866178
