# 1) Import data

In [1]:
from os import path, listdir
import pickle
import pandas as pd
import numpy as np
import csv

In [2]:
def bks_callable(bks_txt):
    return float(bks_txt.read().decode())

## Helper function to generate data dictionary

In [3]:
def get_data(fin_path, trans_callable=None, read_type="rb"):
    """
    Lightweight helper function to get all solution objects
    
    # Save data in data dictionary
    # Each key is an instance name
    # Each value is a list data object
    """          
    def dat_dispenser(dpath):
        """
        Very lightweight data dispenser
        """
        # 2) Get all the data in the directory and subdirectories
        for f in listdir(dpath):
            yield f
        
    dat_dict = {}
    for dat_name in dat_dispenser(fin_path):
        with open(path.join(fin_path, dat_name), "rb") as sob:
            
            # all file types can have [runs]_[name].[filetype]
            # -> remove runs and filetype!
            index = dat_name.split(".")[0]
            if "_" in index:
                if "vrpldtt_freytag" in fin_path:
                    index = index.split("_")[1] + "_{ {int(index.split('_')[2])} }"
                elif "vrptw_gehring_homberger" in fin_path:
                    index = index.split("_")[1] + "_" + index.split("_")[2] + "_" + index.split("_")[3]
                else:
                    index = index.split("_")[1]
            
            # Perform data transformation into suitable format (defined by callable)
            if trans_callable is not None:
                so = trans_callable(sob)
            else:
                so = sob
                
            # If only one value -> no list, otherwise use list
            try:
                dat_dict[index].append(so)
            except KeyError:
                dat_dict[index] = so
            except AttributeError:
                dat_dict[index] = [dat_dict[index], so]
    return dat_dict

In [4]:
def extract_cols(subdat, metrics):
    """
    Utility function to generate columns for each iteration
    """    
    tmp_dat = {}
    
    # For each solution evaluate quality
    for index in subdat:
        tmp_dat[index] = {}
        for metric in metrics:
            
            # Add columns to the rows
            if metric == "nr_veh":
                values = [sum(np.array(sol["solution"].route_driving_times) > 0) for sol in subdat[index]]
                tmp_dat[index]["nr_veh"] = np.mean(values)
            elif metric == "solution_time_ms":
                tmp_dat[index]["solution_time_ms"] = np.mean(values)/1000
            else:
                values = np.array([sol["metrics"][metric] for sol in subdat[index]])
                tmp_dat[index][metric] = np.mean(values)

                if metric =="value":
                    tmp_dat[index][f"best_value"] = min(values)
                    
                    
                    nr_vehs = np.array([sum(np.array(sol["solution"].route_driving_times) > 0) for sol in subdat[index]])
                    tmp_dat[index]["nr_veh_best"] = nr_vehs[min(values) == np.array(values)][0]
                
    # Transform data
    return pd.DataFrame.from_dict(tmp_dat, orient="index")

def extract_bks(subdat):
    return pd.Series(subdat)

In [5]:
rename_dict = {"solution_time_ms": "Time (s)",
               "value": "Avg",
               "best_value": "Best",
               "iterations": "Iters",
               "nr_veh": "Nr. Veh",
              "nr_veh_best": "Nr. Veh. Best"}


def format_data(dat_path, optype, metrics=["value", "iterations", "solution_time_ms", "nr_veh"], bks=True):
    """
    Utility funciton to format the imported solution data
    """
    dat_dict = {}

    # Import data
    sol_path = path.join(dat_path, "solution", optype)
    bks_path = path.join(dat_path, "bks")

    dat_dict["data"] = get_data(sol_path, pickle.load, read_type="rb")

    
    # Extract most important colums
    df = extract_cols(dat_dict["data"], metrics)
    
    # Format the data
    for col in df.columns:
        try:
            # Rename
            df.rename(columns=rename_dict, inplace = True)
        except KeyError:
            print(f"Column {col} is not considered in the format rules")
    
    # Create new additional columns
    if bks:
        dat_dict["BKS"] = get_data(bks_path, bks_callable, read_type="r")
        df["BKS"] = extract_bks(dat_dict["BKS"])
        df["GAP % (best)"] = (df["Best"] - df["BKS"] + 0.0001) / df["BKS"] *100
        df["GAP % (avg)"] = (df["Avg"] - df["BKS"] + 0.0001) / df["BKS"] *100
        # Reorder columns
        return df[["BKS", "Avg", "GAP % (avg)", "Best", "GAP % (best)", "Iters", "Time (s)", "Nr. Veh"]].round(2)
    else:
        df["BKS"] = df["Best"]
        df["GAP % (avg)"] = (df["Avg"] - df["BKS"] + 0.0001) / df["BKS"] *100
        return df[["BKS", "Avg", "GAP % (avg)", "Iters", "Time (s)", "Nr. Veh", "Nr. Veh. Best"]].round(2)
        

    
    

# Export fontaine Data

In [6]:
dat_path = 'C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrptw_gehring_homberger'
otype = "all_operators\\200"

df = format_data(dat_path, otype, bks=None)
df = df.round(2)

In [7]:
df

Unnamed: 0,BKS,Avg,GAP % (avg),Iters,Time (s),Nr. Veh,Nr. Veh. Best
C1_2_1,2704.57,2704.57,0.0,43103.8,43.1,20.0,20
C1_2_10,2630.77,2634.83,0.15,58472.6,58.47,19.0,19
C1_2_2,2700.65,2700.73,0.0,31942.2,31.94,20.0,20
C1_2_3,2681.94,2682.16,0.01,59981.0,59.98,20.0,20
C1_2_4,2641.29,2647.63,0.24,41854.8,41.85,18.8,19
C1_2_5,2702.05,2702.05,0.0,49252.6,49.25,20.0,20
C1_2_6,2701.04,2701.04,0.0,47683.4,47.68,20.0,20
C1_2_7,2701.04,2701.04,0.0,45640.2,45.64,20.0,20
C1_2_8,2690.27,2690.27,0.0,50491.0,50.49,20.0,20
C1_2_9,2645.47,2655.12,0.36,49141.6,49.14,19.2,19


In [26]:
df.to_csv(path.join(dat_path, f"summary_{otype}.csv"), sep='&', quoting=0, line_terminator="\\\\ \n")

# Further aggregate data to Cases

In [8]:
def aggregate_by_case(df, dat_path, len_del = 3):
    if "vrpldtt_freytag" in dat_path:
        df["case"] = df.index.str[:len_del] + df.index.str[3:]
    else:
        df["case"] = df.index.str[:len_del]
        

    df = df.groupby("case").mean()
    return df

In [9]:
df2 = aggregate_by_case(df, dat_path)
df2.loc["CS"] = df2.sum()
df2 = df2.round(2)
df2

Unnamed: 0_level_0,BKS,Avg,GAP % (avg),Iters,Time (s),Nr. Veh,Nr. Veh. Best
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
C1_,2679.91,2681.94,0.08,47756.32,47.75,19.7,19.7
C2_,1835.04,1842.89,0.44,27960.72,27.96,6.46,6.4
R1_,3618.18,3647.61,0.83,64025.22,64.03,19.58,19.5
R2_,2712.5,2752.91,1.53,38804.36,38.8,6.14,6.6
RC1,3187.0,3206.09,0.6,58761.9,58.76,19.06,19.0
RC2,2322.36,2359.35,1.6,34074.1,34.07,6.34,6.7
CS,16355.0,16490.79,5.08,271382.62,271.38,77.28,77.9


In [13]:
df2.to_csv(path.join(dat_path, f"summary_aggregation_{otype}.csv"), sep='&', quoting=0, line_terminator="\\\\ \n")

In [10]:
df2

Unnamed: 0_level_0,BKS,Avg,GAP % (avg),Best,GAP % (best),Iters,Time (s),Nr. Veh
case,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Fu1,26.37,26.37,0.01,26.37,0.01,17358.1,17.36,3.0
Fu2,33.75,33.75,0.02,33.75,0.02,11895.6,11.9,6.0
Fu3,39.87,39.9,0.05,39.9,0.05,14780.5,14.78,7.0
Fu4,25.85,25.86,0.03,25.86,0.03,21957.9,21.96,3.0
Fu5,32.85,32.87,0.07,32.87,0.07,13987.0,13.99,6.0
Fu6,38.01,38.01,0.02,38.01,0.02,13374.9,13.37,7.0
Ma1,26.72,26.72,0.0,26.72,0.0,17447.4,17.45,3.0
Ma2,36.18,36.19,0.05,36.19,0.05,14542.6,14.54,5.0
Ma3,45.01,45.01,0.0,45.01,0.0,19219.1,19.22,7.0
Ma4,26.07,26.07,0.01,26.07,0.01,24179.1,24.18,3.0


# 99 Rename files

In [32]:
# Short makro to rename all solutions
import os

def dat_dispenser(dpath):
    """
    Very lightweight data dispenser
    """
    # 2) Get all the data in the directory and subdirectories
    for f in listdir(dpath):
        yield f

hdir = "C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_fontaine\\solution\\selected_operators"
os.chdir(hdir)

rename_dict = {"C10": "Fu",
              "C20": "Ma",
              "C30": "Pi",
              "C40": "Se",
              "C50": "Sy"}

def dat_dispenser(dpath):
    """
    Very lightweight data dispenser
    """
    # 2) Get all the data in the directory and subdirectories
    for f in os.listdir(dpath):
        yield f
        
for f in dat_dispenser(hdir):
    os.rename(f, f"{f[:2]}{rename_dict[f[2:5]]}{f[5]}{f[-4:]}")
    # os.rename(f, f[:3]+".csv")
    #print(f)

In [57]:
t = np.array([1,2,3])
t[t == 1]

array([1])

In [68]:
def y(x):
    for k in x:
        yield k

In [69]:
t = y([1,2,3])

In [75]:
t.__next__()

3

<_io.BufferedReader name='C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_fontaine\\data\\Fu4.pkl'>


[[0.0,
  0.0,
  0.0008620692858462587,
  0.003168332733951878,
  0.004918092264308023,
  0.003532299746405286,
  0.00271630780756999,
  0.004754015355732087,
  0.012073772875398204,
  0.019353697452017624,
  0.03482069953650053,
  0.02305095996028779,
  0.03157719314210123,
  0.02297903745275459,
  0.016340535708880315,
  0.0338044913046077,
  0.02538553408008244,
  0.0020356276272524647,
  0.011744582153910025,
  0.0016216237537798983,
  0.0026627313330074365],
 [-0.0,
  0.0,
  0.001666668981486308,
  0.004747827992269705,
  0.006560777378738453,
  0.004441085143266461,
  0.0032727447995397436,
  0.005534065323385325,
  0.014109656278898063,
  0.022514295071609194,
  0.0427761771644301,
  0.028750496676850014,
  0.04627818732121069,
  0.030527034648398402,
  0.018056720142023485,
  0.04003203845127179,
  0.026696015676406268,
  0.0015267193365424245,
  0.008009965683397891,
  0.0009868425857852254,
  0.003061238833410063],
 [-0.0008620692858462587,
  -0.001666668981486308,
  0.0,
  0.

# Hypothesis 1: Not max speed with negative slope

In [8]:
import pickle

case = "Fu1"
hdir = "C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_fontaine\\data"

with open(path.join(hdir, f"{case}.pkl"), "rb") as f:
    print(f)
    dat = pickle.load(f)

data = pd.read_csv(path.join("C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_fontaine\\cases", f"{case}.csv"), index_col=0)
distance_matrix = data.iloc[:, 7:].values

<_io.BufferedReader name='C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_fontaine\\data\\Fu1.pkl'>


In [9]:
veloctiy_cube = dat.time_cube.copy()
for lid, lbm in enumerate(dat.time_cube):
    for i, y in enumerate(lbm):
        for j, x in enumerate(y):
            veloctiy_cube[lid][i][j] = distance_matrix[i][j]/(x/60)

  """


In [10]:
diff = []
for lid, lbm in enumerate(veloctiy_cube):
     for i, y in enumerate(lbm):
        for j, x in enumerate(y):
            if (dat.slope_matrix[i][j] < 0) and (x < 24.8):
                diff.append((x, lid, i, j))

In [11]:
len(diff)

0