In [8]:
#Imports
import random
import pandas as pd
import numpy as np
import os
import subprocess

In [9]:
#single run, to change the directory; necessary to perform command line actions
os.chdir("knapsacksolver")

In [14]:
#only needed if cell above ran multiple times due to the structure "knapsacksolver/knapsacksolver"
os.chdir("..")

In [10]:
#Similar to FinalizeInstances, Functions to read the maximum profit and time needed.
#Standard Values if no solution was found: max_profit = 0, time = 50000 (time will be overwritten later)
def read_max_profit(string):
    string = string[string.find("Value"):]
    string= string[:string.find("\\n")]
    if string == "": #Empty string cannot be converted to integer. Thus, the exception is prevented
        return 0
    else: 
        string = int(string[6:])
        return string

def read_time(string):
    string = string[string.find("Time"):]
    string = string[:string.find("\\n")]
    if string == "":
        return 50000
    else: 
        string = round(float(string[10:]),2) #The result is sometimes more accurate for some instances 
        return string
    return string

#Funtion to perform the commandline command to solve instances with the desired algorithm
def solve_profit_time(filepath, algorithm, time_limit, seed):
    #Every parameter is given to the command line command
    solution = subprocess.run(["./bazel-bin/knapsacksolver/main -v 1 --algorithm "+str(algorithm)+" --input " + str(filepath) + " --time-limit "+str(time_limit)+" --seed "+str(seed)+" --format pisinger"], shell=True, capture_output=True)
    solution = str(solution.stdout)
    profit = read_max_profit(solution)
    time = read_time(solution)
    #returns the profit and time for the algorithm
    return(profit, time)

In [12]:
#Function to compare the solution with the optimal solution and returning the time accordingly
def calculate_score(filepath, scores, profit, time, time_limit):
    with open(filepath) as file:
        lines = file.readlines()
    max_profit_line = lines[3]
    max_profit = int(max_profit_line[2:])
    if profit == max_profit:
        #Instance was solved, therefore the time is the solution time
        time = time 
    else: 
        #Instance was not solved, punished by the PAR10 score. Time_limit is given in seconds, time is in milliseconds
        time = time_limit * 1000 * 10 
    newrow = pd.DataFrame({"time": [time]}, index=[filepath]) #Definition of new row for the scores file
    scores = pd.concat([scores, newrow]) #Append new row
    return scores

In [129]:
time_limit = 5 #In seconds

#Function for parallel calculation based on the seed
def parallel(seed):    
    for algorithm in ["greedy", "dynamic_programming_bellman_array", "branch_and_bound_sort"]: #Every algortihm for each seed
        scores = pd.DataFrame(columns=["time"]) #Create new scores file
        for filename in os.listdir("dataset/"): #All 20,300 instances
            if ".csv" in filename: #Avoid Jupyter checkpoints
                filepath = "dataset/"+filename
                
                #Solve the particular instance with the given algorithm
                profit, time = solve_profit_time(filepath, algorithm, time_limit, seed)
                
                #Calculate the score
                scores = calculate_score(filepath, scores, profit, time, time_limit)
                
        #Write the scores into the folder "scores". For 10 seeds and 3 algorithms, 30 csv files are created
        scores.to_csv("scores/scores_"+str(algorithm)+"_"+str(seed)+".csv", index=True, sep=",", header=True, index_label=None)

In [None]:
#Parallel Calculation of all Par10 Scores that will create 10 csv files per algorithm in folder "scores"
from joblib import Parallel, delayed
Parallel(n_jobs=8)(delayed(parallel)(i) for i in range(1,11))
#Parallel calculation took almost 5 days, as the algorithms "dynamic_programming_bellman_array" and "branch_and_bound_sort"
#used the entire time_limit for calculation, when not solving an instance

In [57]:
#First, non-parallel approach that would take a very long time
#Included to depict the solving process
'''
time_limit = 5 #in seconds

for seed in range(1,11):
    for algorithm in ["greedy", "dynamic_programming_bellman_array", "branch_and_bound_sort"]:
        scores = pd.DataFrame(columns=["time"])
        for filename in os.listdir("dataset/"):
            filepath = "dataset/"+filename
            profit, time = solve_profit_time(filepath, algorithm, time_limit, seed, scores)
            scores = calculate_score(filepath, scores, profit, time, time_limit)
        scores.to_csv("scores/scores_"+str(algorithm)+"_"+str(seed)+".csv", index=True, sep=",", header=True, index_label=None)
'''

In [132]:
#Averaging the scores of each runs, combining the scores into a single file
par10_file = pd.DataFrame()
for algorithm in ["greedy", "dynamic_programming_bellman_array", "branch_and_bound_sort"]:
    concat_file = pd.DataFrame() #DataFrame for each algorithm
    for seed in range(1,11): 
        #Read all score files and add the columns to the DataFrame
        single_column = pd.read_csv("scores/scores_"+str(algorithm)+"_"+str(seed)+".csv", header=0, index_col=0)
        concat_file = pd.concat([concat_file, single_column], axis = 1, join="outer", sort=True)
    
    #Calculate the average of the 10 runs
    concat_file = pd.DataFrame(round(concat_file.mean(axis=1), 4))
    
    #Set the column name
    concat_file.columns = [algorithm]
    
    #Add the algorithm to the par10 file
    par10_file = pd.concat([par10_file, concat_file], axis = 1, join="outer", sort=True)

#Saving the file in folder scores    
par10_file.to_csv("scores/par10_scores.csv", index=True, sep=",", header=True, index_label=None)

In [5]:
#Rewrite index to match indices in instances.csv
scores = pd.DataFrame(pd.read_csv("scores/par10_scores.csv", header=0, index_col=0))
scores.index = scores.index.str[8:] #Remove the "dataset/" from the filepath
scores.to_csv("scores/par10_scores.csv", index=True, sep=",", header=True, index_label=None)

In [16]:
#Combo as benchmark, not included in the thesis
#Solving each instance with the algorihm combo once, identical structure as before (apart from parallel programming)
time_limit = 5 #in seconds
seed = 1
#Function for parallel calculation
for algorithm in ["combo"]:
    scores = pd.DataFrame(columns=["time"])
    for filename in os.listdir("dataset/"):
        if ".csv" in filename:
            filepath = "dataset/"+filename
            profit, time = solve_profit_time(filepath, algorithm, time_limit, seed, scores)
            scores = calculate_score(filepath, scores, profit, time, time_limit)
    scores.to_csv("scores/scores_"+str(algorithm)+"_"+str(seed)+".csv", index=True, sep=",", header=True, index_label=None)