In [7]:
import matplotlib.pyplot as plt
import numpy as np
from mst_algorithm import MST
from mst_graphs_local import NetworkXGraph
import networkx as nx
import random
import time
from enum import Enum
from scipy.stats import skewnorm
import csv
import math


In [8]:
class GraphType(Enum):
    RANDOM = 'ramdom_graph'
    EXPANDER = 'expander_graph'
    COMPLETE = 'complete_graph'
    CYCLE = 'cycle_graph'
    
class WeightDistribution(Enum):
    RANDOM = 'random_weights'
    EXPONENTIAL = 'exponantial_weights'
    NORMAL = 'normal_weights'
    SKEW_NORMAL = 'skew_normal'

class MaxRead(Enum):
    HUNDRED = 100
    THOUSAND = 1000
    TENTHOUSAND = 10000
    HUNDREDTHOUSAND = 100000
    
class NodeSize(Enum):
    THOUSAND = 1000
    TENTHOUSAND = 10000
    HUNDREDTHOUSAND = 100000
    MILLION = 1000000
    
class MaxWeight(Enum):
    ONE = 1
    TEN = 10
    HUNDRED = 100
    THOUSAND = 1000    
    
def checkConstraints(graph_type, weight_distribution, max_read, node_size, ax_weight):
    
    # Cannot read more or equal nodes than actual number of nodes in a graph 
    if max_read > node_size:
        return False, "Max read is larger than node size!"
    
    # Constraint complete graph node_size to 10.000
    if (graph_type is GraphType.COMPLETE.value) and (node_size >= NodeSize.TENTHOUSAND.value):
        return False, "Complete Graph cannot handle more than 10000 nodes!"
    
    return True, ""
    

default_size = NodeSize.THOUSAND.value
default_degree = 4
default_max_weight = MaxWeight.HUNDRED.value
default_max_read = MaxRead.HUNDRED.value
default_graph_type = GraphType.RANDOM.value
default_weight_distribution = WeightDistribution.RANDOM.value

def print_constants(node_size, degree, max_read,max_weight, graph_type, weight_distribution):
    output = "Size: "+str(node_size)+"\n"+"Degree: "+str(degree)+"\n"+"Max Weight: "+str(max_weight)+"\n"+"Max Read: "+str(max_read)+"\n"+"Graph Type: "+str(graph_type)+"\n"+"Weight Distribution: "+str(weight_distribution)+"\n"
    print(output)

In [9]:
#Random NetworkX graph

def random_graph(degree,size):
    graph = nx.generators.random_regular_graph(degree, size)
    return graph
    
#Expanders NetworkX Graph

def expander_graph(size):
    graph = nx.generators.margulis_gabber_galil_graph(round(math.sqrt(size)),create_using=nx.Graph)
    return graph


# Complete graph

def complete_graph(size):
    graph = nx.generators.complete_graph(size)
    return graph

# Cycle graph

def cycle_graph(size):
    graph = nx.generators.cycle_graph(size)
    return graph


In [10]:
def assign_random_edge_weights(graph, max_weight):
    for (u, v) in graph.edges():
        graph.edges[u, v]['weight'] = random.randint(1, max_weight)
    return graph

def assign_exponential_edge_weights(graph,max_weight,custom_scale=4):
    #todo use max weight to compute value
    for (u, v) in graph.edges():
        graph.edges[u, v]['weight'] = round(np.random.exponential(scale=custom_scale))
    return graph

def assign_normal_edge_weights(graph, max_weight, custom_scale=1):
    for (u, v) in graph.edges():
        graph.edges[u, v]['weight'] = round(np.random.normal(loc=(max_weight/2),scale=custom_scale))
    return graph

def assign_normal_skew_edge_weights(graph, max_weight, alpha_skew=3,custom_scale=1):
    for (u, v) in graph.edges():
            graph.edges[u, v]['weight'] = round(skewnorm.rvs(alpha_skew,loc=max_weight/2,scale=custom_scale))
    return graph

In [14]:
# Function to do a single comparison run against nx.kruskall with custom parameters

def kruskall_comparison(node_size=default_size, degree=default_degree, max_read=default_max_read, max_weight=default_max_weight,graph_type=default_graph_type,weight_distribution=default_weight_distribution):
    
    if max_read > node_size:
        raise Exception("Please reduce default_max_read or increase node size!")
    
    print("Create graph with the following constants:")
    print_constants(node_size, degree, max_read,max_weight,graph_type,weight_distribution)
    
    #Chose here different graph options from above
    graph = None
    
    if graph_type is GraphType.RANDOM.value:
        graph = random_graph(degree,node_size)
    elif graph_type is GraphType.EXPANDER.value:
        graph = expander_graph(node_size)
    elif graph_type is GraphType.COMPLETE.value:
        graph = complete_graph(node_size)
    elif graph_type is GraphType.CYCLE.value:
        graph = complete_graph(node_size)
                        
    #Chose here different weight sampling options from above
    if weight_distribution is WeightDistribution.RANDOM.value:
        graph = assign_random_edge_weights(graph, max_weight)
    elif weight_distribution is WeightDistribution.EXPONENTIAL.value:
        graph = assign_exponential_edge_weights(graph,max_weight)
    elif weight_distribution is WeightDistribution.NORMAL.value:
        graph = assign_normal_edge_weights(graph,max_weight)
    elif weight_distribution is WeightDistribution.SKEW_NORMAL.value:
        graph = assign_normal_skew_edge_weights(graph,max_weight)
        
    #Our algorithm
    our_start_time = time.time()
    approx = MST(
        NetworkXGraph(graph, node_size, max_weight, max_read)
    ).approx_weight()
    our_runtime = time.time() - our_start_time
    print("Our Runtime: "+str(our_runtime))
    print("Our MST weight approx. for random graph:", approx,"\n")
    
    #NetworkX Kruskall Algorithm
    kruskall_start_time = time.time()
    mst=nx.minimum_spanning_tree(graph)
    
    try:
        kruskall_time = time.time()-kruskall_start_time
        print("NetworkX Kruskall runtime: "+str(kruskall_time))

        gt_w = sum(graph.edges[e]["weight"] for e in mst.edges)
        print("ground truth weight", gt_w)
        
        if gt_w == 0 or approx == 0:
            err = "NaN"
        else:
            ratio = max(gt_w, approx) / min(gt_w, approx)
            err = 100 * (ratio - 1)
    except Exception as e:
        print("Exception", e)
        kruskall_time = "NaN"
        gt_w = "NaN"
        err = "NaN"
    
    results = {
        "our_runtime" : our_runtime,
        "kuskrall_time" : kruskall_time,
        "groundtruth" : gt_w,
        "approximation" : approx,
        "error" : err
    }
    
    return results


In [None]:
"""
Exaustive cross product comparison
- 4 graph types
- 4 distributions
- 4 max reads
- 4 max weight
- 4 max graph size
"""


"""
returns: combinations -> List of tuplbes with all possible combinations [(GraphType,NodeSize,WeightDistribution,MaxWeigth,MaxRead)]
"""
def generate_combinations():
    combinations = []
    for c1, e in enumerate(GraphType):
        for c2, f in enumerate(NodeSize):
            for c3, g in enumerate(WeightDistribution):
                for c4, h in enumerate(MaxWeight):
                    for c5, i in enumerate(MaxRead):
                        combinations.append((e,f,g,h,i))
    return combinations


def write_result_to_csv(filename,row):
    with open( filename, 'a', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        writer.writerow(row)
        
def full_analysis(filename):
    header = ['graph_type','node_size','weight_distribution','max_weight','max_read','our_runtime','kuskrall_time','groundtruth','approximation','error']
    write_result_to_csv(filename,header)
    options = generate_combinations()
    for opt in options:
        csv_row = []
        gt = opt[0]
        ns = opt[1]
        wd = opt[2]
        mw = opt[3]
        mr = opt[4]
        csv_row.extend([gt.value,ns.value,wd.value,mw.value,mr.value])
        
        check, msg = checkConstraints(gt.value, wd.value, mr.value, ns.value, mw.value)
        if gt is GraphType.RANDOM or gt is GraphType.EXPANDER:
            continue
        if check:
            try:
                result = kruskall_comparison(node_size=ns.value,max_read=mr.value,max_weight=mw.value,graph_type=gt.value,weight_distribution=wd.value)
                csv_row.extend([result['our_runtime'],result['kuskrall_time'],result['groundtruth'],result['approximation'],result['error']])
            except Exception as e:
                print(e)
                csv_row.extend(['NaN','NaN','NaN','NaN','NaN'])
        else:
            print(msg)
            csv_row.extend(['NaN','NaN','NaN','NaN','NaN'])

        write_result_to_csv(filename,csv_row)
        
        
#Start full analysis        
full_analysis("all-result-new.csv")

Create graph with the following constants:
Size: 1000
Degree: 4
Max Weight: 1
Max Read: 100
Graph Type: complete_graph
Weight Distribution: random_weights

Our Runtime: 0.2870521545410156
Our MST weight approx. for random graph: 999 

NetworkX Kruskall runtime: 2.0893068313598633
ground truth weight 999
error % 0.0
Create graph with the following constants:
Size: 1000
Degree: 4
Max Weight: 1
Max Read: 1000
Graph Type: complete_graph
Weight Distribution: random_weights

Our Runtime: 1.5720019340515137
Our MST weight approx. for random graph: 992 

NetworkX Kruskall runtime: 1.595796823501587
ground truth weight 999
error % 0.7056451612903247
Max read is larger than node size!
Max read is larger than node size!
Create graph with the following constants:
Size: 1000
Degree: 4
Max Weight: 10
Max Read: 100
Graph Type: complete_graph
Weight Distribution: random_weights

Our Runtime: 1.0430662631988525
Our MST weight approx. for random graph: 999 

NetworkX Kruskall runtime: 2.1467199325561523

In [15]:

#Check here for different test optionsoptions

#Single kruskall comparison with default values
#result_single_default = kruskall_comparison()

#Skruskall comparison with custom parameters
result_single_custom = kruskall_comparison(node_size=100, degree=5, max_read=30, max_weight=150,graph_type=GraphType.COMPLETE.value,weight_distribution=WeightDistribution.EXPONENTIAL.value)

#Increase node size with custom settings
#result_increment_custom = increasing_runtime_performance(start=100,stop=1000,increment=10,graph_type=GraphType.RANDOM.value,weight_distribution=WeightDistribution.RANDOM.value)

Create graph with the following constants:
Size: 100
Degree: 5
Max Weight: 150
Max Read: 30
Graph Type: complete_graph
Weight Distribution: exponantial_weights

Our Runtime: 0.5934817790985107
Our MST weight approx. for random graph: 99 

NetworkX Kruskall runtime: 0.01065516471862793
ground truth weight 0


In [None]:
# Test function that increasing node size with options to customize parameters

def increasing_runtime_performance(start,stop,increment,degree=default_degree,max_read=default_max_read,max_weight=default_max_weight,graph_type=default_graph_type,weight_distribution=default_weight_distribution):
    sizes = []
    o_times = []
    k_times = []
    results = []
    for size in range(start,stop,increment):
        try:
            result = kruskall_comparison(node_size=size, degree=degree, max_read=max_read, max_weight=max_weight,graph_type=graph_type,weight_distribution=weight_distribution)
        except Exception as e:
            print(e)
        sizes.append(size)
        o_times.append(result["our_time"])
        k_times.append(result["kuskrall_time"])
        results.append(result)
    plt.figure()
    plt.plot(sizes,o_times)
    plt.plot(sizes,k_times)
    plt.xlabel("Nodes")
    plt.ylabel("Time")
    plt.show()
    return results
