# Comparing results from output files & updating csv file for comparison

To compare different algorithms we need to determine the delay of passengers. To compute the delay we need
1. the desired arrival time of the passenger
2. and the actual arrival time of the passenger.

The delay is then computes as the difference between both, if the passenger arrives later than desired. If a passenger arrives earlier than desired, the delay is defined as 0.

In [1]:
import math
import numpy as np
import pandas as pd     # for the use of DataFrames
import os
import csv

In [2]:
import path

def import_input_data(file_path):
    """
    Method to import input-files. This needs to be done to extract the desired arrival time of passengers.
    
    :param file_path: File path to input file.
    :return: 1 DataFrame containing split data from input (passengers)
    """

    f = open(file_path)
    list_of_lines = f.readlines()   # reading text from file line-by-line into a list of strings

    # Clean up data
    data = map(str.rstrip, list_of_lines)   # remove \n at the end of each string (row/line) in list
    df = pd.DataFrame(data, columns = ['data'])   # convert list of strings into DataFrame with column name 'data'
    df = df.loc[df['data'] != '']   # remove empty rows
    df = df.loc[(df['data'].str.contains('#') == False)]   # remove comment lines/rows (containing #)
    df = df.reset_index(drop = True)   # reset indexes in DataFrame to be continuous again after removing rows/lines

    # Extract indexes for passengers:
    passengers_index = df.index[df['data'] == '[Passengers]'][0]

    # Split data into DataFrames
    passenger_df = pd.DataFrame(df['data'].loc[passengers_index + 1 : ].str.split(expand = True))
    passenger_df.columns = ['ID', 'StartStation', 'Destination', 'GroupSize', 'ExpectedArrival']
    passenger_df = passenger_df.reset_index(drop = True)
    passenger_df['ActualArrival'] = np.nan

    f.close()

    return passenger_df

In [3]:
def import_output_data(file_path_output, file_path_input):
    """
    Method to import output-files, i.e. the passenger timetables. 
    This needs to be done to extract the actual arrival time of passengers.

    :param file_path_output: File path to output file.
    :param file_path_input: File path to input file.
    :return: 4 DataFrames containing split data from input (stations, lines, trains, passengers)
    """
    #print(file_path_input, filepath_output)
    f = open(file_path_output)
    list_of_lines = f.readlines()   # reading text from file line-by-line into a list of strings

    # Clean up data
    data = map(str.rstrip, list_of_lines)   # remove \n at the end of each string (row/line) in list
    df = pd.DataFrame(data, columns = ['data'])   # convert list of strings into DataFrame with column name 'data'
    df = df.loc[df['data'] != '']   # remove empty rows
    df = df.loc[(df['data'].str.contains('#') == False)]   # remove comment lines/rows (containing #)
    df = df.loc[(df['data'].str.contains('Detrain|Passenger:') != False)]   # remove lines/rows not concerning arriving passengers (containing Detrain)
    df = df.reset_index(drop = True)   # reset indexes in DataFrame to be continuous again after removing rows/lines

    results[file_path_input][file_path_output] = results[file_path_input]['passengers_input'].copy()
    index = 0
    current_passenger_id = ''
    for element in df['data']:
        if element.startswith('['):
            splitted_string = element.split(':')
            current_passenger_id = splitted_string[1][:-1]
        else:
            splitted_string = element.split(' ')
            value = splitted_string[0]
            results[file_path_input][file_path_output].loc[results[file_path_input][file_path_output]['ID'] == current_passenger_id, 'ActualArrival'] = value
        index = index + 1

    f.close()
    return results

In [4]:
results = dict()

# Read input files

In [5]:
print('Input Files:\n')

# Get all file names of files in the Input directory
filenames_input = (
    file for file in os.listdir('Input')
         if os.path.isfile(os.path.join('Input', file))
)

# For each input file: read the file, create a DataFrame with the passenger-data and safe that DataFrame in a dictionary.
for filename_input in filenames_input:
    print(filename_input)
    filepath = os.path.join('Input', filename_input)
    passengers_input = import_input_data(filepath)
    input_dict = dict()
    input_dict['passengers_input'] = passengers_input
    results[filepath] = input_dict

Input Files:

capacity_input.txt
custom_min_11_10-8_12_20_30_4-31_5_10_4-15_20_input.txt
custom_min_165_10-62_12_20_30_31-8978_5_10_4-633_20_input.txt
custom_min_19_10-12_12_20_30_6-97_5_10_4-32_20_input.txt
custom_min_281_10-93_12_20_30_47-27832_5_10_4-1330_20_input.txt
custom_min_33_10-18_12_20_30_9-301_5_10_4-68_20_input.txt
custom_min_57_10-27_12_20_30_14-934_5_10_4-143_20_input.txt
custom_min_6_10-5_12_20_30_3-10_5_10_4-7_20_input.txt
custom_min_97_10-41_12_20_30_21-2896_5_10_4-301_20_input.txt
get_delay_input.txt
large_input.txt
simple_0_input.txt
simple_1_input.txt
simple_2_input.txt
simple_input.txt
station_capacity_input.txt
test-input-1_2_input.txt
test-input-1_input.txt
testLineForthBack_1_input.txt
testLineForthBack_2_input.txt
test_100_passengers_input.txt
test_10_input.txt
test_11_input.txt
test_1_input.txt
test_2_input.txt
test_3_input.txt
test_40_input.txt
test_40_passengers_input.txt
test_4_input.txt
test_5_input.txt
test_6_input.txt
test_7_input.txt
test_8_input.txt
t

# Read output files

In [6]:
# get the name/path of all output files (timetables)
files_and_folders_output = os.listdir('Output')
files_and_folders_output.remove('old')
folderpaths_output = (
    os.path.join('Output', folder) for folder in files_and_folders_output
         if os.path.isfile(os.path.join('Output', folder)) == False
)

for folderpath_output in folderpaths_output:
    print(folderpath_output)
    filenames_output = (
        file for file in os.listdir(folderpath_output)
             if os.path.isfile(os.path.join(folderpath_output, file))
    )
    for filename in filenames_output:
        if filename == 'test_output.txt':
            # test_output.txt does not meet the standardized output structure
            continue 
        filepath_input = os.path.join('Input', filename.replace('output', 'input'))
        filepath_output = os.path.join(folderpath_output, filename)
        if os.path.exists(filepath_input):
            import_output_data(filepath_output, filepath_input)
        else:
            print(filepath_output, 'can not be compared, because there is no input with the same name.')

Output\AMS_Team09_v6
Output\AMS_Team09_v7
Output\FP_Main_v10
Output\FP_Main_v10\custom_min_215_10-10_12_20_30_8-18426_5_10_4-721_20_output.txt kann nicht verglichen werden, da kein gleichnamiger Input existiert.
Output\FP_Main_v11
Output\FP_Main_v11\custom_min_138_10-97_12_20_30_49-322_5_10_4-175_20_output.txt kann nicht verglichen werden, da kein gleichnamiger Input existiert.
Output\FP_Main_v11\custom_min_141_10-42_12_20_30_21-8400_5_10_4-502_20_output.txt kann nicht verglichen werden, da kein gleichnamiger Input existiert.
Output\FP_Main_v11\custom_min_215_10-10_12_20_30_8-18426_5_10_4-721_20_output.txt kann nicht verglichen werden, da kein gleichnamiger Input existiert.
Output\FP_Main_v11\custom_min_216_10-89_12_20_30_45-2744_5_10_4-464_20_output.txt kann nicht verglichen werden, da kein gleichnamiger Input existiert.
Output\FP_Main_v11\custom_min_243_10-68_12_20_30_34-16807_5_10_4-882_20_output.txt kann nicht verglichen werden, da kein gleichnamiger Input existiert.
Output\FP_Main

# Calculate delay and log it into csv file for comparison

In [7]:
# Calculate delay for all inputs and log it into csv file for comparison

csv_df = []

for key_input in results:
    for key_output in results[key_input]:
        # skip inputs where we have no valid data (i.e. the actual arrival time is missing)
        if key_output == 'passengers_input' \
                or key_output == 'Output\old\large_output.txt':
            continue
        if len(results[key_input][key_output]['ActualArrival'].dropna(inplace=False)) == 0:
            continue
        skip = False
        for i in range(len(results[key_input][key_output]['ActualArrival'])):
            if not str(results[key_input][key_output]['ActualArrival'][i]).isnumeric():
                skip = True
                break
        if skip:
            continue
        
        # delay: difference between the actual arrival time and the desired arrival time
        results[key_input][key_output]['Delay'] = results[key_input][key_output]['ActualArrival'].astype(int) - results[key_input][key_output]['ExpectedArrival'].astype(int)
        # weighted delay: delay multiplied by passengers groupsize
        results[key_input][key_output]['WeightedDelay'] =  results[key_input][key_output]['GroupSize'].astype(int) * (results[key_input][key_output]['ActualArrival'].astype(int) - results[key_input][key_output]['ExpectedArrival'].astype(int))
        # set delay to 0 if a passenger arrives earlier than desired
        results[key_input][key_output].loc[results[key_input][key_output]['WeightedDelay'] < 0, 'WeightedDelay'] = 0
        results[key_input][key_output].loc[results[key_input][key_output]['Delay'] < 0, 'Delay'] = 0
        
        # delay summed over all passengers
        total_weighted_delay = sum(results[key_input][key_output]['WeightedDelay'].astype(int))
        total_delay = sum(results[key_input][key_output]['Delay'].astype(int))
        
        rounds = max(results[key_input][key_output]['ActualArrival'].astype(int))
        csv_df.append({'Input': os.path.basename(key_input),
                       'Algorithm': os.path.basename(os.path.dirname(key_output)),
                       'Delay': total_delay,
                       'Weighted Delay': total_weighted_delay,
                       'Rounds': rounds})

    print(key_input, ':\t', 'Total Delay:', total_delay, '\tTotal weighted Delay:', total_weighted_delay, '\tFinished in Round:', rounds)
    if len(results[key_input]) == 1:
        print()

with open('Results.csv', 'w', newline='') as csvfile:
    # write results to .csv-file
    fieldnames = ['Input', 'Algorithm', 'Delay', 'Weighted Delay', 'Rounds']
    writer = csv.DictWriter(csvfile,
                            dialect='excel',
                            fieldnames=fieldnames)

    writer.writeheader()
    writer.writerows(csv_df)

print('\n------------- Done -------------')

Input\capacity_input.txt :	 Total Delay: 0 	Total weighted Delay: 0 	Finished in Round: 3
Input\custom_min_11_10-8_12_20_30_4-31_5_10_4-15_20_input.txt :	 Total Delay: 50 	Total weighted Delay: 468 	Finished in Round: 12
Input\custom_min_165_10-62_12_20_30_31-8978_5_10_4-633_20_input.txt :	 Total Delay: 21028 	Total weighted Delay: 224964 	Finished in Round: 83
Input\custom_min_19_10-12_12_20_30_6-97_5_10_4-32_20_input.txt :	 Total Delay: 190 	Total weighted Delay: 2116 	Finished in Round: 15
Input\custom_min_281_10-93_12_20_30_47-27832_5_10_4-1330_20_input.txt :	 Total Delay: 67302 	Total weighted Delay: 708056 	Finished in Round: 127
Input\custom_min_33_10-18_12_20_30_9-301_5_10_4-68_20_input.txt :	 Total Delay: 662 	Total weighted Delay: 6445 	Finished in Round: 28
Input\custom_min_57_10-27_12_20_30_14-934_5_10_4-143_20_input.txt :	 Total Delay: 1874 	Total weighted Delay: 20295 	Finished in Round: 32
Input\custom_min_6_10-5_12_20_30_3-10_5_10_4-7_20_input.txt :	 Total Delay: 28 	To