## Graph Analytics with ArangoDB (Python-Arango Library) (Part 2)

### Install Required Libraries

In [1]:
!pip3 install --upgrade pip

Collecting pip
  Downloading pip-25.0-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.3.1
    Uninstalling pip-24.3.1:
      Successfully uninstalled pip-24.3.1
Successfully installed pip-25.0


In [2]:
%pip install python-arango
%pip install networkx
%pip install numpy
%pip install scipy
%pip install tabulate
%pip install geopandas

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


### Library Imports

In [3]:
import sys, csv, statistics

from arango import ArangoClient
import networkx as nx
import pkg_resources

import numpy as np
import scipy
from tabulate import tabulate
import geopandas as gpd

### Library Versions

In [4]:
l = 15
r = 14

arango_version = pkg_resources.get_distribution("python-arango").version

print("Software & Library Versions".center(l+r))
print('-'* (l + 1) + '|' + '-' * (r - 2))
print('Python'.rjust(l), '|', sys.version[0:6])
print('Arango Client'.rjust(l), '|', arango_version)
print('NetworkX'.rjust(l), '|', nx.__version__)
print('NumPy'.rjust(l), '|', np.__version__)
print('SciPy'.rjust(l), '|', scipy.__version__)

 Software & Library Versions 
----------------|------------
         Python | 3.11.4
  Arango Client | 8.1.2
       NetworkX | 3.4.2
          NumPy | 2.1.3
          SciPy | 1.14.1


### Import Dataset

In [5]:
# Connect to ArangoDB
client = ArangoClient()
db = client.db('_system', username='root', password='testpassword')

# Access collections
nodes = db.collection('airports')
edges = db.collection('flights')

# Fetch graph data
graph_data = {
    'nodes': list(nodes.all()),
    'edges': list(edges.all())
}

flightGraph = nx.MultiDiGraph()

# Add nodes
for node in graph_data['nodes']:
    flightGraph.add_node(node['_key'], **node)

# Add edges
for edge in graph_data['edges']:
    flightGraph.add_edge(
        edge['_from'].split('/')[-1],
        edge['_to'].split('/')[-1],
        **edge
    )

### Some Introductory Commands

#### Show Number Airports

In [6]:
query = """
FOR airport IN airports
    COLLECT WITH COUNT INTO numAirports
RETURN { NumberOfAirports: numAirports }
"""
result = db.aql.execute(query)
print(list(result))

[{'NumberOfAirports': 365}]


#### Show the Number of Flights

In [7]:
query = """
FOR flight IN flights
    COLLECT WITH COUNT INTO numFlights
RETURN { NumberOfFlights: numFlights }
"""
result = db.aql.execute(query)

print(list(result))

[{'NumberOfFlights': 992298}]


#### Add Airline Names for Each Airline Code

In [8]:
# Load CSV file
with open('../import/airlines.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        query = """
        FOR flight IN flights
            FILTER flight.airline_id == @airline_id
            UPDATE flight WITH {
                airline_name: @airline_name,
                airline_code: @airline_code
            } IN flights
        """
        db.aql.execute(query, bind_vars={
            'airline_id': row['airline_id'],
            'airline_name': row['airline_name'],
            'airline_code': row['airline_code']
        })

#### Add Airport Name, City, & State

In [9]:
with open('../import/airports.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        query = """
        FOR airport IN airports
            FILTER airport.unique_id == @unique_id
            UPDATE airport WITH {
                airport_code: @airport_code,
                airport_name: @airport_name,
                airport_city: @city_name,
                airportState_code: @state,
                airportState_name: @state_name
            } IN airports
        """
        db.aql.execute(query, bind_vars={
            'unique_id': row['unique_id'],
            'airport_code': row['airport_code'],
            'airport_name': row['airport_name'],
            'city_name': row['city_name'],
            'state': row['state'],
            'state_name': row['state_name']
        })

## Utility Functions

#### Find Node ID Based On Value of Attribute

In [10]:
def get_node_by_attribute(graph, attribute_name, attribute_value):
    """
    Find the node in the graph that matches the given attribute value.
    
    Args:
        graph (nx.Graph): The NetworkX graph.
        attribute_name (str): The name of the attribute to search by.
        attribute_value (str): The value of the attribute to match.
        
    Returns:
        The ID of the node that matches the given attribute value, or None if not found.
    """
    for node, attributes in graph.nodes(data=True):
        if attributes.get(attribute_name) == attribute_value:
            return node  # Return node ID
    return None

#### Find Value of Attribute Based On Node ID

In [11]:
def convert_node_id_to_attr_value(data, mapping_dict, attribute_name):
    """
    Replace keys in a list of tuples with the corresponding attribute values from a mapping dictionary.
    
    Args:
        data (list of tuples): The input list containing (key, value) pairs.
        mapping_dict (dict): A dictionary where keys are node/airport IDs and values are dictionaries of attributes.
        attribute_name (str): The name of the attribute to replace the key with.
        
    Returns:
        list: A list of tuples where the keys are replaced with the specified attribute values.
    """
    if not isinstance(data, list) or not all(isinstance(item, tuple) and len(item) == 2 for item in data):
        raise ValueError("Data must be a list of (key, value) tuples.")
    
    if not isinstance(mapping_dict, dict):
        raise ValueError("Mapping dictionary must be a valid dictionary.")
    
    if not isinstance(attribute_name, str):
        raise ValueError("Attribute name must be a string.")
    
    # Replace keys with the specified attribute value
    updated_data = []
    for key, value in data:
        attribute_value = mapping_dict.get(key, {}).get(attribute_name, key)  # Default to the original key if attribute is missing
        updated_data.append((attribute_value, value))
    
    return updated_data

### Functions to Create Maps Between Ids & Attribute Values

In [12]:
# Function to create a map of node_id's as the keys and as the values

def create_airport_code_dict(G):
    """
    Extracts a dictionary mapping node IDs to their respective airport_code attributes from a NetworkX graph.
    
    Args:
        G (networkx.Graph): A NetworkX graph with nodes containing an 'airport_code' attribute.
        
    Returns:
        dict: A dictionary with node IDs as keys and airport_code as values.
    """
    # Initialize an empty dictionary
    airport_code_dict = {}

    # Iterate over the nodes and their attributes
    for node, attributes in G.nodes(data=True):
        airport_code = attributes.get('airport_code')  # Get the airport_code attribute
        if airport_code:  # Only include if the airport_code attribute exists
            airport_code_dict[node] = airport_code  # Map node ID to airport_code

    return airport_code_dict

### --------------------------------------------------------------------
# Example output:

# {
#     1: "JFK",
#     2: "LAX"
# }
### --------------------------------------------------------------------

id_to_airport_code_mapping_dict = create_airport_code_dict(flightGraph)

print(id_to_airport_code_mapping_dict)
print(len(id_to_airport_code_mapping_dict))

{'179': 'FWA', '181': 'BZN', '183': 'RDM', '185': 'BKG', '187': 'MQT', '189': 'JAC', '191': 'MSP', '193': 'HIB', '195': 'UCA', '197': 'AUS', '199': 'HDN', '201': 'MAZ', '203': 'ANI', '205': 'CDC', '207': 'AMA', '209': 'PSC', '211': 'PVD', '213': 'AGS', '215': 'AVL', '217': 'CVG', '219': 'PLN', '221': 'PBI', '223': 'LMT', '225': 'BTM', '227': 'CPR', '229': 'ITH', '231': 'UTM', '233': 'CRW', '235': 'YUM', '237': 'DHN', '239': 'ATL', '241': 'MKG', '243': 'JAN', '245': 'LCH', '247': 'DIK', '249': 'LNY', '251': 'EWN', '253': 'ROW', '255': 'ITO', '257': 'ELM', '259': 'MWH', '261': 'HOB', '263': 'MCO', '265': 'EGE', '267': 'SLC', '269': 'CHO', '271': 'ECP', '273': 'ORD', '275': 'ISN', '277': 'SAN', '279': 'PDX', '281': 'GSO', '283': 'PIH', '285': 'MCN', '287': 'BNA', '289': 'SEA', '291': 'SAT', '293': 'SMX', '295': 'FSD', '297': 'BRD', '299': 'HOU', '301': 'EKO', '303': 'ERI', '305': 'GRR', '307': 'BFL', '309': 'SPN', '311': 'TUL', '313': 'INL', '315': 'DUT', '317': 'ATW', '319': 'HRL', '321'

### Convert Dictionary Keys From Node Id's to Airport Codes

In [13]:
# Convert each & every KEY from node_id to the airport_code value

def convert_output_keys_to_airport_codes(
    nx_output_dict, 
    node_to_airport_code_dict=id_to_airport_code_mapping_dict):
    """
    Converts the keys of a NetworkX output dictionary from node IDs to airport codes.
    
    Args:
        nx_output_dict (dict): A dictionary output from NetworkX with node IDs as keys.
        node_to_airport_code_dict (dict): A dictionary mapping node IDs to airport codes.
    
    Returns:
        dict: A dictionary with airport codes as keys and the original values preserved.
    """
    converted_dict = {}
    
    for node_id, value in nx_output_dict.items():
        # Get the airport code for the given node ID
        airport_code = node_to_airport_code_dict.get(node_id, f"UNKNOWN({node_id})")
        
        # Add to the new dictionary
        converted_dict[airport_code] = value

    return converted_dict

# Example Use:
    
# converted_dict = convert_output_keys_to_airport_codes(nx_output_dict, id_to_airport_code_mapping_dict)

# Example Output:

# {
#     "JFK": 3,
#     "LAX": 5,
#     "ORD": 2
# }

In [14]:
# handle instances of the value being inputted as a single instance

def convert_single_instance(node_id, node_to_airport_code_dict):
    """
    Convert a single node_id (integer) to its corresponding airport_code.
    """
    return node_to_airport_code_dict.get(node_id, node_id)

In [15]:
# handle instances of the value being inputted as a list 

def convert_list(node_id_list, node_to_airport_code_dict):
    """
    Convert a list of node_ids (including nested lists) to their corresponding airport_codes.
    """
    converted_list = []
    for item in node_id_list:
        if isinstance(item, list):
            # Recursively process nested lists
            converted_list.append(convert_list(item, node_to_airport_code_dict))
        else:
            # Convert single elements
            converted_list.append(node_to_airport_code_dict.get(item, item))
    return converted_list

In [16]:
# handle instances of the value being inputted as a tuple 

def convert_tuple(node_id_tuple, node_to_airport_code_dict):
    """
    Convert a tuple of node_ids to their corresponding airport_codes.
    """
    return tuple(node_to_airport_code_dict.get(node_id, node_id) for node_id in node_id_tuple)

In [17]:
# handle instances of the value being inputted as a set

def convert_set(node_id_set, node_to_airport_code_dict):
    """
    Convert a set of node_ids to their corresponding airport_codes.
    """
    return {node_to_airport_code_dict.get(node_id, node_id) for node_id in node_id_set}

In [18]:
# handle instances of the value being inputted as a dictionary

def convert_dict(node_id_dict, node_to_airport_code_dict):
    """
    Convert a dictionary with node_ids in values to their corresponding airport_codes.
    """
    return {key: convert_values(value, node_to_airport_code_dict) for key, value in node_id_dict.items()}

In [19]:
def convert_values(value, node_to_airport_code_dict):
    """
    Dynamically determine the type of input value and convert it appropriately.
    """
    if isinstance(value, int):
        return convert_single_instance(value, node_to_airport_code_dict)
    elif isinstance(value, list):
        return convert_list(value, node_to_airport_code_dict)
    elif isinstance(value, tuple):
        return convert_tuple(value, node_to_airport_code_dict)
    elif isinstance(value, set):
        return convert_set(value, node_to_airport_code_dict)
    elif isinstance(value, dict):
        return convert_dict(value, node_to_airport_code_dict)
    else:
        # If the value doesn't match any known type, return it as is
        return value

### Convert List of Sets of Node Id's to Airport Code's

In [20]:
def convert_node_ids_to_airport_codes(components_list, id_to_airport_code_mapping_dict):
    """
    Converts node IDs to airport codes using a mapping dictionary.

    Args:
        components_list (list): A list of sets containing node IDs.
        id_to_airport_code_mapping_dict (dict): Dictionary mapping node IDs to airport codes.

    Returns:
        list: A list of sets with node IDs replaced by airport codes.
    """
    return [
        {id_to_airport_code_mapping_dict.get(node_id, node_id) for node_id in component}
        for component in components_list
    ]

### Function(s) To Modify Output Format

#### Create Function to Return Statisics About Path Lengths (Dictionary Input)

In [21]:
def retrieve_path_stats(path_lengths):
    """
    Analyze the values in a dictionary of path lengths.

    Parameters:
        path_lengths (dict): A dictionary where keys are nodes and values are path lengths.

    Returns:
        dict: A dictionary with max, mean, and count of the path lengths.
    """
    if not path_lengths:
        return {
            "min": None,
            "max": None, 
            "mean": None, 
            "count": 0,
            "std Dev": None,
            "harm_mean": None,
            "geo_mean": None,
            "quants": None,
            "mode": None
        }

    # Filter out non-numeric values from the list (e.g., lists, strings, etc.)
    values = [v for v in path_lengths.values() if isinstance(v, (int, float))]

    if not values:  # Handle case where no valid numeric values exist
        return {
            "min": None,
            "max": None, 
            "mean": None, 
            "count": 0,
            "std Dev": None,
            "harmonic_mean": None,
            "geometric_mean": None,
            "quants": None,
            "mode": None
        }

    # Ensure all values are positive for geometric mean calculation
    positive_values = [v for v in values if v > 0]

    min_value = min(values)
    max_value = max(values)
    mean_value = statistics.mean(values)
    count = len(values)
    pop_st_dev = statistics.pstdev(values)  # This is the population standard deviation
    harm_mean = statistics.harmonic_mean(values)

    # Calculate the geometric mean if there are positive values
    if positive_values:
        geo_mean = statistics.geometric_mean(positive_values)
    else:
        geo_mean = None  # No valid positive values for geometric mean

    quants = statistics.quantiles(values, n=5)

    try:
        modes = statistics.mode(values)
    except statistics.StatisticsError:
        modes = None  # In case there are no unique modes

    return {
        "min": min_value,
        "max": max_value, 
        "mean": mean_value, 
        "count": count, 
        "std Dev": pop_st_dev,
        "harmonic_mean": harm_mean,
        "geometric_mean": geo_mean,
        "quants": quants,
        "mode": modes
    }

#### Function to Sort Dictionary in Order Based on Value & Return Top N Key/Value Pairs

In [22]:
def get_top_n_from_sorted_dict(input_dict, n, desc_order=True):
    """
    Sort a dictionary by its values in descending order and return the top n key-value pairs.
    
    Args:
        input_dict (dict): The dictionary to sort.
        n (int): The number of top key-value pairs to return.
        desc (bool): True for sorting in descending order. False for ascending order.
        
    Returns:
        list: A list of tuples containing the top n key-value pairs, sorted by value.
    """
    if not isinstance(input_dict, dict):
        raise ValueError("Input must be a dictionary.")
    if not isinstance(n, int) or n <= 0:
        raise ValueError("The number of top items (n) must be a positive integer.")
    if not isinstance(desc_order, bool) or n <= 0:
        raise ValueError("desc_order blue be a boolean value.")
    
    # Sort the dictionary by values in descending order and extract the top n items
    sorted_items = sorted(input_dict.items(), key=lambda item: item[1], reverse=desc_order)
    return sorted_items[:n]

#### Convert 'flight_distance' Attribute Values From String to Integer Values

In [23]:
# Convert 'flight_distance' attribute values from String to integer values
for u, v, d in flightGraph.edges(data=True):
    if 'flight_distance' in d:
        try:
            d['flight_distance'] = int(float(d['flight_distance']))  # Convert to float first, then integer
        except ValueError:
            print(f"Invalid flight_distance on edge ({u}, {v}): {d['flight_distance']}")
            d['flight_distance'] = 0 # Default value for invalid data

#### Centrality
- Part 1
    - Degree
        - degree_centrality
        - in_degree_centrality
        - out_degree_centrality
    - Closeness
        - closeness_centrality
    - (Shortest Path) Betweenness
        - betweenness_centrality
- Part 2
    - Load 
        - load_centrality
    - Harmonic Centrality
        - harmonic_centrality
    - Percolation
        - percolation_centrality
    - Trophic
        - trophic_levels
    - VoteRank
        - voterank

In [24]:
# The degree centrality for a node is the fraction of nodes it is connected to.
deg_cent = nx.degree_centrality(flightGraph) 

# The in-degree centrality for a node is the fraction of nodes its incoming edges are connected to.
id_cent = nx.in_degree_centrality(flightGraph)

# The out-degree centrality for a node is the fraction of nodes its outgoing edges are connected to.
od_cent = nx.out_degree_centrality(flightGraph)

# Closeness
closeness_centrality = nx.closeness_centrality(
    flightGraph,
    wf_improved=True
)

# (Shortest Path) Betweenness
bc = nx.betweenness_centrality(
    flightGraph, 
    endpoints=True
    )

# Weighted Closeness Centrality
weighted_closeness_centrality = nx.closeness_centrality(
    flightGraph, 
    distance="flight_distance",
    wf_improved=True
    )

### ---------------------------------------------------------------------------------------------------------------------------------
# Convert the keys from the node_id's to their respective airport_code's
deg_cent = convert_output_keys_to_airport_codes(
    deg_cent, 
    id_to_airport_code_mapping_dict
    )

id_cent = convert_output_keys_to_airport_codes(
    id_cent, 
    id_to_airport_code_mapping_dict
    )

od_cent = convert_output_keys_to_airport_codes(
    od_cent,
    id_to_airport_code_mapping_dict
    )

closeness_centrality = convert_output_keys_to_airport_codes(
    closeness_centrality, 
    id_to_airport_code_mapping_dict
    )

bc = convert_output_keys_to_airport_codes(
    bc, 
    id_to_airport_code_mapping_dict
    )

weighted_closeness_centrality = convert_output_keys_to_airport_codes(
    closeness_centrality, 
    id_to_airport_code_mapping_dict
    )

### ---------------------------------------------------------------------------------------------------------------------------------
# Return the Top 12 results (after they are sorted)
n = 12

centrality_outputs = [
    deg_cent, 
    id_cent,
    od_cent,
    closeness_centrality,
    bc,
    weighted_closeness_centrality
]

centrality_output_names = [
    "Degree Centrality",
    "In Degree Centrality",
    "Out Degree Centrality",
    "Closeness Centrality",
    "Betweenness Centrality",
    "Weighted Closeness Centrality"
]

# loop through each of the values above to get the top & bottom 12 results (after sorted)
for co in range(len(centrality_outputs)):
    col_name = centrality_output_names[co]
    print(col_name)
    top_12_values = get_top_n_from_sorted_dict(centrality_outputs[co], n, desc_order=True)
    bottom_12_values = get_top_n_from_sorted_dict(centrality_outputs[co], n, desc_order=False)
    
    print(f"""The top {n} values for {col_name} are:
    {top_12_values}
The bottom {n} values for {col_name} are:
    {bottom_12_values}
""")

Degree Centrality
The top 12 values for Degree Centrality are:
    [('ATL', 306.28571428571433), ('ORD', 282.31868131868134), ('DFW', 249.02197802197804), ('LAX', 193.4368131868132), ('PHX', 163.11538461538464), ('DEN', 162.1401098901099), ('IAH', 140.489010989011), ('SFO', 132.20604395604397), ('LAS', 130.01923076923077), ('DTW', 128.62637362637363), ('MSP', 121.00000000000001), ('CLT', 116.85989010989012)]
The bottom 12 values for Degree Centrality are:
    [('UCA', 0.0), ('TUL', 0.0), ('FOE', 0.0), ('HYS', 0.0), ('OKC', 0.0), ('LAW', 0.0), ('ISO', 0.0), ('GCK', 0.0), ('ROP', 0.0), ('ROR', 0.0), ('MHK', 0.0), ('ICT', 0.0)]

In Degree Centrality
The top 12 values for In Degree Centrality are:
    [('ATL', 152.54395604395606), ('ORD', 141.07967032967034), ('DFW', 124.55494505494507), ('LAX', 96.65659340659342), ('PHX', 81.53296703296704), ('DEN', 81.23626373626374), ('IAH', 69.90384615384616), ('SFO', 66.03296703296704), ('LAS', 64.93131868131869), ('DTW', 64.42032967032968), ('MSP', 6

In [25]:
source = get_node_by_attribute(flightGraph, "airport_code", "MKE")

# Load Centrality
lc = nx.load_centrality(
    flightGraph, 
    cutoff=5,
    normalized=True
    )

# Harmonic Centrality
hc = nx.harmonic_centrality(
    flightGraph,
    distance="flight_distance"
    )

# Reaching
lrc = nx.local_reaching_centrality(
    flightGraph, 
    v=source, 
    paths=None, 
    normalized=True
    )

# Percolation
pc = nx.percolation_centrality(flightGraph)

# Trophic
tl = nx.trophic_levels(flightGraph)

# VoteRank
vr = nx.voterank(flightGraph)

### ---------------------------------------------------------------------------------------------------------------------------------

# Convert the keys from the node_id's to their respective airport_code's
lc = convert_output_keys_to_airport_codes(
    lc, 
    id_to_airport_code_mapping_dict
    )

hc = convert_output_keys_to_airport_codes(
    hc, 
    id_to_airport_code_mapping_dict
    )

pc = convert_output_keys_to_airport_codes(
    pc, 
    id_to_airport_code_mapping_dict
    )

tl = convert_output_keys_to_airport_codes(
    tl,
    id_to_airport_code_mapping_dict
    )

vr = [id_to_airport_code_mapping_dict.get(node_id, node_id) for node_id in vr]

### ---------------------------------------------------------------------------------------------------------------------------------
# Return the Top 12 results (after they are sorted)
n = 12

centrality_outputs = [
    lc, 
    hc,
    pc,
    tl
]

centrality_output_names = [
    "Load Centrality",
    "Harmonic Centrality",
    "Percolation",
    "Trophic Level"
]

# loop through each of the values above to get the top & bottom 12 results (after sorted)
for co in range(len(centrality_outputs)):
    col_name = centrality_output_names[co]
    print(col_name)
    top_12_values = get_top_n_from_sorted_dict(centrality_outputs[co], n, desc_order=True)
    bottom_12_values = get_top_n_from_sorted_dict(centrality_outputs[co], n, desc_order=False)
    
    print(f"""The top {n} values for {col_name} are:
    {top_12_values}
The bottom {n} values for {col_name} are:
    {bottom_12_values}
""")

print(f"The Local Reaching Centrality is: {lrc}.\n")

print(f"There are {len(vr)} airports in the Vote Rank output. They are (in order): {vr}.")

Load Centrality
The top 12 values for Load Centrality are:
    [('ATL', 0.1306638522176983), ('DFW', 0.11465289879868668), ('ORD', 0.10376748548014621), ('DEN', 0.09713155860796076), ('MSP', 0.089451156218076), ('ANC', 0.07299275968797214), ('IAH', 0.07294982747096553), ('SLC', 0.07219482437038995), ('DTW', 0.056379186754066134), ('LAX', 0.04487316911621955), ('SFO', 0.044040292572139615), ('SEA', 0.0385166472597061)]
The bottom 12 values for Load Centrality are:
    [('RDM', 0.0), ('HIB', 0.0), ('UCA', 0.0), ('HDN', 0.0), ('MAZ', 0.0), ('CDC', 0.0), ('PSC', 0.0), ('AGS', 0.0), ('PLN', 0.0), ('LMT', 0.0), ('BTM', 0.0), ('CPR', 0.0)]

Harmonic Centrality
The top 12 values for Harmonic Centrality are:
    [('DTW', 0.5610976360067543), ('ORD', 0.5591878186550777), ('CVG', 0.555109308341165), ('ATL', 0.5297362690517526), ('DAY', 0.5260081310300452), ('TOL', 0.5233120360309452), ('CLT', 0.5178371226570421), ('MKE', 0.5163258740670005), ('IAH', 0.5156080989895292), ('LEX', 0.512448652631544)

### Functions to Create Maps Between Ids & Attribute Values

#### Clustering
- square_clustering

In [26]:
sclust = nx.square_clustering(flightGraph)
sclust = convert_output_keys_to_airport_codes(
    sclust, 
    id_to_airport_code_mapping_dict
    )

# Return the Top 12 results (after they are sorted)
n = 12
col_name = "Square Clustering"

top_12_values = get_top_n_from_sorted_dict(
    sclust, 
    n, 
    desc_order=True
    )

bottom_12_values = get_top_n_from_sorted_dict(
    sclust, 
    n, 
    desc_order=False
    )

print(f"""The top {n} values for {col_name} are:
    {top_12_values}
The bottom {n} values for {col_name} are:
    {bottom_12_values}
""")

The top 12 values for Square Clustering are:
    [('LCH', 0.7485029940119761), ('LRD', 0.7485029940119761), ('BRO', 0.7485029940119761), ('ILE', 0.7485029940119761), ('TXK', 0.7485029940119761), ('CLL', 0.7485029940119761), ('SJT', 0.7485029940119761), ('ABI', 0.7485029940119761), ('BPT', 0.7485029940119761), ('ACT', 0.7485029940119761), ('MVY', 0.6853932584269663), ('SMX', 0.6761904761904762)]
The bottom 12 values for Square Clustering are:
    [('UCA', 0), ('MAZ', 0), ('ANI', 0), ('BTM', 0), ('UTM', 0), ('DHN', 0), ('MKG', 0), ('LNY', 0), ('EWN', 0), ('MWH', 0), ('HOB', 0), ('PIH', 0)]



#### Communities
- Label Propagation
    - asyn_lpa_communities
    - fast_label_propagation_communities

In [27]:
# Label propagation
# ** Label propagation community detection algorithms.

a_lpa_comm = nx.community.asyn_lpa_communities(
    flightGraph, 
    weight="flight_distance",
    seed=42
    )

flpc = nx.community.fast_label_propagation_communities(
    flightGraph,
    weight="flight_distance",
    seed=42
    )

# Print Results
print(f"Asynchronous LPA Communities:")

top_level_comms_a_lpa_comm = list(next(a_lpa_comm))
print(f"""Top Level Communities ({len(top_level_comms_a_lpa_comm)}): 
      {[id_to_airport_code_mapping_dict.get(node_id, node_id) for node_id in top_level_comms_a_lpa_comm]}
      """)

try:
    # Loop through all levels of communities
    level = 2
    while True:
        a_lpa_comm_communities = list(next(a_lpa_comm))
        print(f"""Level {level} communities ({len(a_lpa_comm_communities)}): {[id_to_airport_code_mapping_dict.get(node_id, node_id) for node_id in a_lpa_comm_communities]}
              """)
        level += 1
except StopIteration:
    pass

print("Fast Label Propagation Communities")

top_level_comms_flpc = list(next(flpc))
print(f"""Top Level Communities ({len(top_level_comms_flpc)}): 
      {[id_to_airport_code_mapping_dict.get(node_id, node_id) for node_id in top_level_comms_a_lpa_comm]}
      """)

try:
    # Loop through all levels of communities
    level = 2
    while True:
        flpc_communities = list(next(flpc))
        print(f"""Level {level} communities ({len(flpc_communities)}): {[id_to_airport_code_mapping_dict.get(node_id, node_id) for node_id in flpc_communities]}""")
        level += 1
except StopIteration:
    pass

Asynchronous LPA Communities:
Top Level Communities (348): 
      ['EWR', 'AVL', 'MOB', 'MOT', 'CRW', 'GEG', 'LGB', 'ELP', 'UTM', 'MEI', 'DFW', 'RIC', 'HLN', 'FAR', 'LAS', 'ECP', 'OXR', 'ABY', 'FCA', 'CYS', 'JNU', 'IAH', 'DHN', 'WYS', 'CLD', 'BOS', 'MIA', 'BGR', 'LMT', 'BPT', 'EVV', 'AKN', 'VLD', 'SBA', 'DCA', 'PAH', 'CNY', 'MWH', 'BIS', 'CIC', 'PUB', 'SAT', 'SFO', 'LWS', 'ERI', 'SRQ', 'CPR', 'JAX', 'MYR', 'ASE', 'LFT', 'MFR', 'ORF', 'PIH', 'BIL', 'SIT', 'SPS', 'ANC', 'PIE', 'ABI', 'ADQ', 'CMH', 'HOB', 'MOD', 'STX', 'HIB', 'RDM', 'MDT', 'MIB', 'SUX', 'ACT', 'BTM', 'OTZ', 'LIH', 'SYR', 'RNO', 'LGA', 'SEA', 'STL', 'SAF', 'PIA', 'BUR', 'MVY', 'SCE', 'ALB', 'EYW', 'SPN', 'SMF', 'CVG', 'GTF', 'PBG', 'SJC', 'FNT', 'SDF', 'BRW', 'AMA', 'SLC', 'VPS', 'GSO', 'MSY', 'MCN', 'DLG', 'CMI', 'VEL', 'MGM', 'MSN', 'GGG', 'HPN', 'AVP', 'SPI', 'JAC', 'MKG', 'LBE', 'CLE', 'GRI', 'CEC', 'MCO', 'CLL', 'EGE', 'BUF', 'FLL', 'TPA', 'FWA', 'STC', 'BKG', 'STT', 'MKK', 'ELM', 'PSG', 'MSP', 'AEX', 'SUN', 'INL', 'M

#### Components
- Strong Connectivity
    - is_strongly_connected
    - number_strongly_connected_components
    - strongly_connected_components
    - kosaraju_strongly_connected_components
- Weak Connectivity
    - is_weakly_connected
    - number_weakly_connected_components
    - weakly_connected_components
- Attracting Components 
    - is_attracting_component
    - number_attracting_components
    - attracting_components
- Semiconnectedness
    -  is_semiconnected

In [28]:
# Strong connectivity
isc = nx.is_strongly_connected(flightGraph)
ns_conn_comps = nx.number_strongly_connected_components(flightGraph)
s_conn_comps = nx.strongly_connected_components(flightGraph)
ks_conn_comp = nx.kosaraju_strongly_connected_components(flightGraph)

# Weak connectivity
iwc = nx.is_weakly_connected(flightGraph)
nwcc = nx.number_weakly_connected_components(flightGraph)
wcc = nx.weakly_connected_components(flightGraph)

# Attracting components
iac = nx.is_attracting_component(flightGraph)
nac = nx.number_attracting_components(flightGraph)
attr_comps = nx.attracting_components(flightGraph)

# Semiconnectedness
is_semiconnected = nx.is_semiconnected(flightGraph)

# Unpack generator results
list_of_scc = [
    c
    for c in sorted(
        s_conn_comps, 
        key=len, 
        reverse=True
        )
    ]

list_of_ks_conn_comp = [
    c
    for c in sorted(
        ks_conn_comp, 
        key=len, 
        reverse=True
        )
    ]

list_of_wcc = [
    c
    for c in sorted(
        wcc, 
        key=len, 
        reverse=True
        )
    ]

list_of_attr_comps = [c for c in attr_comps]

# Convert node ids to airport_code attribute values
scc = convert_node_ids_to_airport_codes(
    list_of_scc, 
    id_to_airport_code_mapping_dict
    )

ks_conn_comp = convert_node_ids_to_airport_codes(
    list_of_ks_conn_comp, 
    id_to_airport_code_mapping_dict
    )

wcc = convert_node_ids_to_airport_codes(
    list_of_wcc, 
    id_to_airport_code_mapping_dict
    )

attr_comps = convert_node_ids_to_airport_codes(
    list_of_attr_comps, 
    id_to_airport_code_mapping_dict
    )


# Boolean outputs
def connected_strength_output(isc, iwc, is_semiconnected):
    isc_output = "is NOT strong" if isc == False else "strong"
    iwc_output = "is NOT weak" if iwc == False else "weak"
    is_semiconnected_output = "is NOT semiconnected" if is_semiconnected == False else "is semiconnected"
    
    print(f"This graph {isc_output}.")
    print(f"This graph {iwc_output}.")
    print(f"This graph {is_semiconnected_output}.")

# Print Results
print("Converted Strongly Connected Components (SCC):", scc)
print("Converted Kosaraju SCC:", ks_conn_comp)
print("Converted Weakly Connected Components (WCC):", wcc)
print("The number of strongly connected components is: ", ns_conn_comps)
print("The number of weakly connected components is: ", nwcc)

connected_strength_output(
    isc=isc, 
    iwc=iwc, 
    is_semiconnected=is_semiconnected
    )

iac_output = "NOT " if not iac else ""
list_of_attr_comps = convert_node_ids_to_airport_codes(list_of_attr_comps, id_to_airport_code_mapping_dict)

print(f"This graph does {iac_output}consists of a single attracting component.")
print(f"These are the {nac} attracting component(s): {list_of_attr_comps}")

Converted Strongly Connected Components (SCC): [{'GNV', 'DRO', 'ADK', 'ELP', 'FLG', 'AMA', 'TLH', 'FAR', 'JMS', 'ABE', 'LIH', 'SCC', 'LSE', 'PPG', 'BRO', 'SCE', 'DEN', 'RHI', 'GJT', 'RDU', 'IYK', 'APF', 'FWA', 'OAK', 'ORD', 'SBA', 'TYR', 'SGF', 'JAC', 'AUS', 'ISN', 'ANC', 'BJI', 'LFT', 'LWS', 'MOD', 'CIU', 'MCN', 'AGS', 'ISP', 'MKK', 'GST', 'GRB', 'FLO', 'SUN', 'ONT', 'PSG', 'ITH', 'CDC', 'MSO', 'BKG', 'GPT', 'SYR', 'BUR', 'CHA', 'ITO', 'GTR', 'JLN', 'MIA', 'INL', 'EWN', 'MVY', 'BTV', 'VCT', 'FCA', 'LGA', 'BRW', 'CYS', 'DAL', 'BUF', 'FNT', 'SWF', 'EYW', 'RIC', 'MHT', 'HPN', 'OTH', 'BDL', 'WYS', 'ADQ', 'SAF', 'CVG', 'SMF', 'GUC', 'MCI', 'DUT', 'ROC', 'VPS', 'CLT', 'ASE', 'JNU', 'ESC', 'GGG', 'STX', 'MLB', 'GRI', 'PDX', 'PFN', 'IND', 'WRG', 'MDW', 'PBI', 'STT', 'HVN', 'CHS', 'IAD', 'ALB', 'LYH', 'CAE', 'TOL', 'VEL', 'MQT', 'BTR', 'HIB', 'IAH', 'KTN', 'ILE', 'MLU', 'SBN', 'SJC', 'PIH', 'FAI', 'RDD', 'MAF', 'CWA', 'ABR', 'DBQ', 'IAG', 'MTJ', 'EAU', 'YAK', 'ABI', 'SMX', 'KOA', 'MSY', 'DFW',

#### Distance Measures
- harmonic_diameter

In [29]:
harmonic_diameter = nx.harmonic_diameter(flightGraph)

print("The Harmonic Diameter is", harmonic_diameter)

The Harmonic Diameter is 2.391734958945008


#### Dominating Sets
- dominating_set

In [30]:
dominating_set = list(nx.dominating_set(
    flightGraph, 
    start_with=None
    ))

# Applying the conversion function
dominating_set_airport_codes = convert_list(
    dominating_set, 
    id_to_airport_code_mapping_dict
    )

# Output the result
print(f"""The Dominating Set includes these {len(dominating_set)} airports: 
{dominating_set_airport_codes}""")

The Dominating Set includes these 212 airports: 
['ABR', 'EWR', 'RST', 'LWB', 'MOB', 'FNT', 'BGM', 'TEX', 'MOT', 'MHK', 'CRW', 'HTS', 'GEG', 'BRW', 'LGB', 'HRL', 'ELP', 'UTM', 'MEI', 'IDA', 'ITH', 'BFL', 'AMA', 'VIS', 'VPS', 'SHD', 'DLH', 'CDC', 'GUM', 'ROW', 'COU', 'MCN', 'HLN', 'FOE', 'RFD', 'CMI', 'FAR', 'CHA', 'GRB', 'ECP', 'RAP', 'MAZ', 'VEL', 'MGM', 'OXR', 'ORH', 'TRI', 'FSD', 'GGG', 'TUL', 'HYS', 'ABY', 'OTH', 'IAG', 'SPI', 'CYS', 'UCA', 'JNU', 'DVL', 'ANI', 'MKG', 'MSO', 'LNY', 'LBE', 'RHI', 'DHN', 'WYS', 'CLD', 'GRI', 'ISN', 'ISO', 'CEC', 'ATW', 'PHF', 'CLL', 'TVC', 'UST', 'PFN', 'ROR', 'AZO', 'IYK', 'SJT', 'GJT', 'FAY', 'JLN', 'ART', 'LMT', 'BPT', 'EVV', 'AKN', 'BRO', 'VLD', 'GCK', 'STC', 'CMX', 'BKG', 'ICT', 'EUG', 'BMI', 'SBA', 'PAH', 'CNY', 'DIK', 'MWH', 'BIS', 'ABE', 'AEX', 'CWA', 'CIC', 'SUN', 'MMH', 'PUB', 'LWS', 'PLN', 'ESC', 'COD', 'EWN', 'ERI', 'GPT', 'CDV', 'MBS', 'CPR', 'GCC', 'ISP', 'ILM', 'LAR', 'ACY', 'LSE', 'IPL', 'BLI', 'GUC', 'SGF', 'ASE', 'HVN', 'MFR', 'CID'

#### Regular
- is_regular

In [31]:
is_regular = nx.is_regular(flightGraph)

if is_regular:
    print("This graph is regular.")
else:
    print("This graph is NOT regular.")

This graph is NOT regular.
