In [1]:
import os
import os.path
import re

import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

from IPython.display import display

In [2]:
plt.rcParams['figure.figsize'] = [20, 8]
topleveldir = os.path.join(os.getcwd(), '..', 'results', '20220928allresults')

Utility functions for processing summary files

In [36]:
def summary_files(basepath, subdir):
    """Returns a list of all summary files from the subdirectory."""
    summary_files = []
    summary_dir = os.path.join(basepath, subdir)
    pattern = re.compile('[0-9_]*_summary.csv$')
    for root, directories, files in os.walk(summary_dir):
        for file in files:
            if pattern.match(file):
                summary_files.append(os.path.join(root, file))
    return summary_files

def merge_summary(summary_files, select_n):
    """Merge the results from a set of summary files into a single table."""
    collect_data = pd.DataFrame(columns=['n', 'r', 'a', 'i', 'init_score', 'time', 'best', 'slack'])
    for potential_summary in summary_files:
        data = pd.read_csv(
            potential_summary,
            sep=';',
            usecols=([
                'n', 'r', 'a', 'i', 'init_score', 'time', 'best', 'slack'
            ])
        )
        pot_data = data.loc[data['n'] == select_n]
        if pot_data.shape[0] > 0:
            collect_data = pd.concat([collect_data, pot_data])
    return pd.DataFrame(collect_data, columns=['n', 'r', 'a', 'i', 'init_score', 'time', 'best', 'slack'])

def extract_metrics(merged_summary):
    """Collect metrics on a results table."""
    return {
        "rows": merged_summary.shape[0],
        "avg_score": merged_summary['best'].mean(),
        "avg_time": merged_summary['time'].mean(),
        "no_infeasible": (merged_summary['slack'] > 0).sum()
    }

In [50]:
param_dirs = [
    '20220711sa_original',
    '20220825sa_cutoff',
    '20220826sa_cutoff',
    '20220828sa_cutoff',
    '20220829sa_cutoff',
    '20220830sa_cutoff',
    '20220901sa_cutoff',
    '20220902sa_cutoff',
    '20220903sa_cutoff',
    '20220904sa_cutoff',
    '20220905sa_cutoff'
]

metrics = []

for param_dir in param_dirs:
    # Collect summary files from sub dir
    summ_files = summary_files(topleveldir, param_dir)
    
    # Put results in a single table (selecting for n)
    result_table = merge_summary(summ_files, 10)
    
    # Retrieve metrics
    rm = extract_metrics(result_table)
    metrics.append([rm['rows'], rm['avg_score'], rm['avg_time'], rm['no_infeasible']])

df = pd.DataFrame(metrics, columns=['#rows', 'Avg. score', 'Avg. time', '#infeasible'])
df

Unnamed: 0,#rows,Avg. score,Avg. time,#infeasible
0,40,211.651105,143.497897,7
1,27,220.178419,122.368718,7
2,40,201.251101,180.409429,7
3,40,219.318012,123.16758,7
4,40,211.529846,214.497858,8
5,40,210.579143,247.477863,8
6,40,209.972576,395.833163,8
7,40,201.155525,284.99035,6
8,40,201.124351,238.612601,7
9,40,203.492792,219.701479,7
