In [None]:
import os
import zipfile
import subprocess

import pandas as pd
import matplotlib.pyplot as plt

## Utils

In [None]:
def unzip_data(zip_path: str):
    zip_ref = zipfile.ZipFile(zip_path, "r")
    zip_ref.extractall(os.path.splitext(zip_path)[0])
    zip_ref.close()


def problem_name(file_name: str) -> str:
    return file_name[:file_name.rindex('.')]


def get_problems(path: str) -> pd.DataFrame:
    names = pd.Series()
    directories = pd.Series()
    ss = pd.Series(dtype=bool)
    p2p = pd.Series(dtype=bool)

    for root, dirs, files in os.walk(path):
        files_ = pd.Series(files, dtype="string")
        graphs_ = files_[files_.str.endswith('.gr')]
        names_ = pd.Series([problem_name(graph) for graph in graphs_])
        ss_ = pd.Series([(name + '.ss' in files) for name in names_])
        p2p_ = pd.Series([(name + '.p2p') in files for name in names_])

        names = pd.concat([names, names_], ignore_index=True)
        directories = pd.concat([directories, pd.Series(root, index=graphs_.index)], ignore_index=True)
        ss = pd.concat([ss, ss_], ignore_index=True)
        p2p = pd.concat([p2p, p2p_], ignore_index=True)
        

    return pd.DataFrame({
        'name': names,
        'dir': directories,
        'ss': ss,
        'p2p': p2p
    })


def run_algorithm(
    alg: str, 
    problems_df: pd.DataFrame, 
    ss: bool = True,
    p2p: bool = True
):
    for _, problem in problems_df.iterrows():
        out_dir = problem['dir'].replace('inputs', f'outputs/{alg}')
        if not os.path.exists(out_dir): os.makedirs(out_dir)

        in_path = os.path.join(problem['dir'], problem['name'])
        out_path = os.path.join(out_dir, problem['name'])
        
        print(problem['name'], end=': ', flush=True)
        if ss and problem['ss']:
            print('-> ss', end=' ', flush=True)
            cmd = f"./{alg} -d {in_path + '.gr'} -ss {in_path + '.ss'} -oss {out_path + '.ss.res'}"
            subprocess.run(cmd.split())
        if p2p and problem['p2p']:
            print('-> p2p', end='', flush=True)
            cmd = f"./{alg} -d {in_path + '.gr'} -p2p {in_path + '.p2p'} -op2p {out_path + '.p2p.res'}"
            subprocess.run(cmd.split())
        print(flush=True)


def extract_time(file_name: str):
    with open(file_name, 'r') as file:
        lines = file.readlines()
        for line in lines:
            if line[0] == 't':
                return line.split()[1]


def get_results(path: str) -> pd.DataFrame:
    problem_names = pd.Series(dtype='string')
    exec_times = pd.Series(dtype=float)

    for root, dirs, files in os.walk(path):
        names_ = pd.Series(files, dtype="string")
        names_ = names_[names_.str.endswith('.ss.res')]

        times_ = pd.Series([extract_time(os.path.join(root, name)) for name in names_], dtype=float)

        problem_names = pd.concat([problem_names, names_], ignore_index=True)
        exec_times = pd.concat([exec_times, times_], ignore_index=True)

    return pd.DataFrame({
        'name': problem_names,
        'exec_time': exec_times
    })


def plot_results(df_dict: dict, figsize: tuple = (7, 7)):
    plt.figure(figsize=figsize)
    
    colors = colors = ['blue', 'red', 'green', 'orange', 'purple']
    for i, (alg, df) in enumerate(df_dict.items()):
        plt.scatter(
            df.index, 
            df['exec_time'], 
            color=colors[i],
            s=5,
            label=alg
        )
        plt.plot(
            df.index, 
            [df['exec_time'].mean()] * len(df.index),
            color=colors[i],
            linewidth=2
        )
        
    plt.xlabel('problem index')
    plt.ylabel('execution time')
    plt.legend()
    plt.show();

## Extract data

In [None]:
unzip_data('data/ch9.zip')

problems_df = get_problems('data/ch9')
problems_df.shape

## Run the tests

In [None]:
# Build the <algorithm>.exe files
subprocess.run('make clean'.split())
subprocess.run('make all'.split())

In [None]:
# Run the test for dijkstra algorithm
run_algorithm('dijkstra', problems_df)

In [None]:
# Run the test for dial algorithm
run_algorithm('dial', problems_df)

In [None]:
# Run the test for radix heap algorithm
run_algorithm('radix', problems_df)

## Plot results

In [None]:
results = {}
for alg in ['dijkstra', 'dial', 'radix']:
    results[alg] = get_results(f'data/ch9/outputs/{alg}')

plot_results(results)