# run_and_plot_multithreaded_mst_computation_performance

In [1]:
import time
from subprocess import Popen, PIPE

def flatten_list_of_strings(args):
    output = ''
    for s in args:
        output += s
        output += ' '
    return output

def str_to_num(s):
    if '.' in s:
        return float(s)
    else:
        return int(s)

class PrimerTpSistemasExpRunner():

    def __init__(self, 
        tp_exec,
        running_mode,
        input_graph,
        qty_threads, 
        verbose=False, 
        print_program_logs=True):
        
        self.tp_exec = tp_exec
        self.running_mode = running_mode
        self.input_graph = input_graph
        self.qty_threads = qty_threads
        self.verbose = verbose 
        self.print_program_logs = print_program_logs
        
        self.run_args = [self.tp_exec, 
                         self.running_mode, 
                         self.input_graph, 
                         self.qty_threads]
        
        input_graph_name = input_graph.split('/')[-1].split('.')[0]
        self.exp_results = {"input_graph": input_graph_name, 'qty_threads':qty_threads}
        
        self.print_program_logs = print_program_logs
        self.verbose = verbose
        
    def run_executable(self,args):
        if self.verbose:
            print("=====")
            print("running: {}".format(flatten_list_of_strings(args)))
            print
        
        p = Popen(args, stdout=PIPE, stderr=PIPE, bufsize=1)

        with p.stderr:
            if(p.stderr.peek() is not b''):
                print("Print stderr:")
                for line in iter(p.stderr.readline, b''):
                    line = line.decode("utf-8")
                    print(line)
                    
        if self.print_program_logs:
            print
            print("Print program logs:")
        
        with p.stdout:
            for line in iter(p.stdout.readline, b''):
                line = line.decode("utf-8")
                
                # hacky!!
                if('exp_result:' in line): # expects the following format: 'exp_result: qty_merges = 5'
                    k, v = line.split(': ')[1].split(' =')
                    v = float(v)
                    self.exp_results.update({k: v})
                    
                if self.print_program_logs:
                    print(line)

        p.wait() # wait for the subprocess to exit
        
        if self.verbose:
            print("finished: {}".format(flatten_list_of_strings(args)))
            print("=====")
            print

    def run_exp(self):
        t = time.time()
        self.run_executable(self.run_args)
        exp_duration = time.time() - t
        self.exp_results.update({'exp_duration': exp_duration})
    
    def make(self):
        self.run_executable(["make", '-C', ".."])
        
    def run(self):            
        self.make() # will only recompile when a project file has changed
        self.run_exp()
        return dict(self.exp_results) # we want to return a copy of the result

### Correr una exp :D

In [2]:
er=PrimerTpSistemasExpRunner( 
        tp_exec = "../TP1",
        running_mode = "-t",
        input_graph = "../test/experimentacion/arbol/arbol100.txt",
        qty_threads = "10", 
        verbose=True, 
        print_program_logs=False)
    
    
er.run()

=====
running: make -C .. 
finished: make -C .. 
=====
=====
running: ../TP1 -t ../test/experimentacion/arbol/arbol100.txt 10 
finished: ../TP1 -t ../test/experimentacion/arbol/arbol100.txt 10 
=====


{'input_graph': 'arbol100',
 'qty_threads': '10',
 'qty_nodes': 100.0,
 'qty_edges': 99.0,
 'qty_merges': 63.0,
 'exp_duration': 0.013471603393554688}

### exp configs

In [10]:
import pandas as pd
from scipy import stats
import numpy as np
import os

# input graphs
graph_path = '../test/experimentacion/'
graph_sizes = [100 * (i+1) for i in range(10)]

complete_graph_paths = [graph_path + "completo/completo" + str(size) + '.txt' for size in graph_sizes]
tree_graph_paths = [graph_path + "arbol/arbol" + str(size) + '.txt' for size in graph_sizes]
sparse_graph_paths = [graph_path + "ralo/ralo" + str(size) + '.txt' for size in graph_sizes]
star_graph_paths = [graph_path + "estrella/estrella" + str(size) + '.txt' for size in graph_sizes]
path_graph_paths = [graph_path + "lista_enlazada/lista_enlazada" + str(size) + '.txt' for size in graph_sizes]

# configs
exp_repetitions = 10
graph_files = path_graph_paths
thread_quantities = ["1", "5", "15", "30"]
exp_name = "exp_lista_enlazada"



### run exp

In [11]:
exps = []
print("Running " + exp_name)
for i in range(exp_repetitions):
    print("Rep nr " + str(i))
    for graph_path in graph_files:
        for qty_threads in thread_quantities:
            er=PrimerTpSistemasExpRunner( 
                    tp_exec = "../TP1",
                    running_mode = "-t",
                    input_graph = graph_path,
                    qty_threads = qty_threads, 
                    verbose=False, 
                    print_program_logs=False)
            exps.append(er.run())

exps_df = pd.DataFrame(exps)
exps_df.to_csv("./results/{}.csv".format(exp_name))
exps_df
# descartar outliers
# exps = exps[stats.zscore(np.abs(exps['# de ciclos insumidos totales'])) < z_score_threshold]



Running exp_lista_enlazada
Rep nr 0
Rep nr 1
Rep nr 2
Rep nr 3
Rep nr 4
Rep nr 5
Rep nr 6
Rep nr 7
Rep nr 8
Rep nr 9


Unnamed: 0,exp_duration,input_graph,qty_edges,qty_merges,qty_nodes,qty_threads
0,0.009438,lista_enlazada100,99.0,0.0,100.0,1
1,0.010052,lista_enlazada100,99.0,56.0,100.0,5
2,0.013734,lista_enlazada100,99.0,81.0,100.0,15
3,0.016241,lista_enlazada100,99.0,85.0,100.0,30
4,0.011147,lista_enlazada200,199.0,0.0,200.0,1
5,0.014523,lista_enlazada200,199.0,98.0,200.0,5
6,0.015581,lista_enlazada200,199.0,174.0,200.0,15
7,0.018677,lista_enlazada200,199.0,165.0,200.0,30
8,0.012700,lista_enlazada300,299.0,0.0,300.0,1
9,0.018030,lista_enlazada300,299.0,122.0,300.0,5
