In [1]:
import pickle
from os import makedirs
from os import listdir
from os.path import isfile, join, exists

import pandas as pd

import ALNSv2
# from CompAnalysis.data_import import data_cordeau
# from CompAnalysis.data_import import data_solomun


base_path = "C:\\Users\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data"

# Base functionality

In [2]:


# Get slope and distance matrix
def get_elevation_matrix(elevations):
    """
    Get the elevation matrix for a list elevations (simple)

    Annotation:
    We report the elevation in kms so that it is inline with the km reporting of the distances

    """
    nr_nodes = len(elevations)        
    elevation_matrix = [[0 for i in range(nr_nodes)] for j in range(nr_nodes)]

    for i in range(nr_nodes):
        for j in range(i+1, nr_nodes):
            # altitude difference to get from i to j
            elevation_distance = (elevations[j] - elevations[i])/1000
            elevation_matrix[i][j] = elevation_distance
            elevation_matrix[j][i] = -elevation_distance # obviously the elevation is shifted now
    return elevation_matrix

# Pirmin build cases (CASES)

In [3]:
import pandas as pd
path = join(base_path, "vrpldtt_pirmin")

data = []
for sheet_nr in range(1, 7):
    data.append(pd.read_excel(join(path, "demandFinal_Clean.xlsx"), sheet_name = f"customers ({sheet_nr})"))

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_pirmin\\demandFinal_Clean.xlsx'

Goalis to build demand CSVs and combine all demand with location information

In [4]:
def fill_distance_data(location_data):
    # fill the distance matrix information
    for i in range(7, 28):
        for j in range(i, 28):
            location_data.iloc[j-7, i] = location_data.iloc[i-7,j] #switch
            location_data.iloc[i-7, i] = 0 # diagonal
        
    return location_data

In [5]:
for city_id, distance_name in enumerate(["matrix-fukuoka.xlsx", 
                                         "matrix-madrid.xlsx", 
                                         "matrix-pittsburgh.xlsx", 
                                         "matrix-seattle.xlsx",
                                        "matrix-sydney.xlsx"]):
    for data_id, demand_data in enumerate(data):
        distance_raw_name = distance_name[:-5]

        location_data = pd.read_excel(join(path, distance_name),header=0)
        location_data["demand"] = demand_data["demand"]
        location_data["tw a"] = demand_data["Twa"]
        location_data["tw b"] = demand_data["TWb"]
        location_data["s"] = demand_data["s"]

        del location_data["out of file"]

        # Prun the data to right size (20 customers max)
        location_data = location_data.iloc[:21,:28]
        location_data = fill_distance_data(location_data)

        # export data
        file_name = f"C{(city_id+1)*100+data_id+1}.csv"
        location_data.to_csv(join(path, file_name))

# Pirmin Parse results (BKS)

In [13]:
data = pd.read_excel(join("C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\archive\\resultsFinal_Clean.xlsx"))

data["case"] = data["map"] + (data["demand ID"]).apply(str)

for index in data.index:
    with open(join("C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data", f"{data.loc[index, 'case']}.txt"), "w") as file:
        file.write(str(data.loc[index, 'obj']))

# Pirmin Build data obj (DATA)

In [3]:
"""
This file contains the main macro to read the base data and build a valid data object

"""
from os.path import isfile, join, exists
import logging
import pandas as pd

from ALNSv2 import ALNSData

def build_data_object(path,
                      nr_vehicles=None,
                      nr_load_buckets=10,
                      weight_interval_size=-1,
                      vehicle_capacity = 150,
                      vehicle_weight=140):
    
    # Get slope and distance matrix
    def get_elevation_matrix(elevations):
        """
        Get the elevation matrix for a list elevations (simple)

        Annotation:
        We report the elevation in kms so that it is inline with the km reporting of the distances

        """
        nr_nodes = len(elevations)        
        elevation_matrix = [[0 for i in range(nr_nodes)] for j in range(nr_nodes)]

        for i in range(nr_nodes):
            for j in range(i+1, nr_nodes):
                # altitude difference to get from i to j
                elevation_distance = (elevations[j] - elevations[i])
                elevation_matrix[i][j] = elevation_distance
                elevation_matrix[j][i] = -elevation_distance # obviously the elevation is shifted now
        return elevation_matrix

    
    # Some basic input checks
    data = pd.read_csv(path, index_col=0)

    nr_nodes = data.shape[0]
    nr_depots = 1
    nr_customers = nr_nodes-nr_depots
    weight_interval_size = weight_interval_size
    vehicle_weight = vehicle_weight
    vehicle_capacity =vehicle_capacity

    # set standard settings
    if nr_vehicles is None:
        nr_vehicles = nr_customers
    else:
        nr_vehicles = nr_vehicles


    distance_matrix = data.iloc[:, 7:].values
    logging.info("Distance matrix successfully retrieved")

    # get network information
    altitude = data["elevation"].values
    elevation_matrix = get_elevation_matrix(altitude)

    logging.info("Elevation matrix successfully built")

    # get process information
    demand_array = data["demand"].values[nr_depots:]
    window_start = data["tw a"].values[nr_depots:]
    window_end = data["tw b"].values[nr_depots:]
    service_times = data["s"].values[nr_depots:]

    # Build the data object.  It performs internal preprocessing!
    logging.info("General info retrieved. Preprocessing started.")

    data_object = ALNSData(nr_veh=nr_vehicles,
                           nr_nodes=nr_nodes,
                           nr_customers=nr_customers,
                           demand=demand_array,
                           service_times=service_times,
                           start_window=window_start,
                           end_window=window_end,
                           elevation_m=elevation_matrix,
                           distance_m=distance_matrix,
                           load_bucket_size=weight_interval_size,
                           nr_load_buckets=nr_load_buckets,
                           vehicle_weight=vehicle_weight,
                           vehicle_capacity=vehicle_capacity)

    logging.info("Preprocessing done. Data object built")
    return data_object



# Build data object Solomon | Gehring & Homberger

In [3]:
from ALNSv2 import ALNSData

def build_data_object(file_path):
    """
    Build the data object from a text file
    with syntax introduced by solomun

    http://neo.lcc.uma.es/vrp/vrp-instances/capacitated-vrp-with-time-windows-instances/
    """
    # 1) Parse data
    row_id = 0

    coordinates = [] #
    number_vehicles = 0 #
    nr_nodes = 0 #
    nr_customers = 0 #
    demand_array = [] #
    service_times = [] #
    window_start = [] #
    window_end = [] #
    vehicle_capacity = 0 #

    # Get base data from file
    with open(file_path, 'r') as file:
        for row in file:
            if row_id == 0:
                instance_id = row
            
            if row_id == 4:
                row_data = row.replace("         ", ",").strip().split(",")
                number_vehicles, vehicle_capacity = int(row_data[0]), int(row_data[1])
            
            # start of node data
            if row_id > 8:
                # quick and dirty
                row_data = [x for x in row.replace(" ", ",").split(",") if (x != "") and (x != "\n")]
                if not row_data:
                    break

                coordinates.append((int(row_data[1]), int(row_data[2])))
                
                if row_id > 9:
                    # start of customer data
                    demand_array.append(int(row_data[3]))
                    window_start.append(int(row_data[4]))
                    window_end.append(int(row_data[5]))
                    service_times.append(int(row_data[6]))
        
            nr_customers = len(demand_array)
            nr_nodes = nr_customers + 1
            elevations = [0]*nr_nodes
            row_id += 1
        
    time_cube = [get_distance_matrix(coordinates)] # We must give it a new artifical dimension

    # 2) Build data object
    data_object = ALNSData(nr_veh=number_vehicles,
                       nr_nodes=nr_nodes,
                       nr_customers=nr_customers,
                       demand=demand_array,
                       service_times=service_times,
                       start_window=window_start,
                       end_window=window_end,
                       time_c=time_cube,
                       vehicle_capacity=vehicle_capacity)
    return data_object



# Build cordeau

In [6]:
def parse_row(row):
    return [float(x) for x in row.replace(" ", ",").replace("\n","").split(",") if x != ""]

def build_data_object(file_path):
    """
    Parse data based on the cordeau syntax
    http://neo.lcc.uma.es/vrp/vrp-instances/description-for-files-of-cordeaus-instances/
    
    Annotation:
    Cordeau passes some irrelevant information because his templates 
    are applicatble for a wide range of problem types (e.g. PVRP, VRPTW, etc.)
    We just ignore some things (e.g. maximum route duration)
    """
    coordinates = []
    service_times = []
    demand_array = []
    window_start = []
    window_end = []
    
    with open(file_path, "r") as file:
        for row_id, row in enumerate(file):
            p_row = parse_row(row)
            # general data row
            if row_id == 0:
                p_type, number_vehicles, nr_customers, nr_days = p_row

            # Route limits
            if row_id == 1:
                route_max_duration, vehicle_capacity = p_row
                
                if route_max_duration > 0:
                    raise ValueError("Max duration not considered in the current ALNS model")
                
            # Depot information
            if row_id > 1:
                coordinates.append((p_row[1], p_row[2]))
                                  
                # customer information
                if row_id > 2:
                    if row != "" and row != "\n":
                        service_times.append(p_row[3])
                        demand_array.append(p_row[4])
                        window_start.append(p_row[-2])
                        window_end.append(p_row[-1])
          
    time_cube = [get_distance_matrix(coordinates)] # We must give it a new artifical dimension

    data_object = ALNSData(nr_veh=int(number_vehicles),
                    nr_nodes=int(nr_customers+1),
                    nr_customers=int(nr_customers),
                    demand=demand_array,
                    service_times=service_times,
                    start_window=window_start,
                    end_window=window_end,
                    time_c=time_cube,
                    vehicle_capacity=int(vehicle_capacity))
    return data_object

# Build data

In [5]:
def build_data(python_callable, directory, extention = ""):
    """
    Utility function to perform preprocessing on all case files
    in a given directory
    
    Accepts subfolders in "cases" and builds the same subfolder structure
    in the "data" subdirectory
    
    @param python_callable: Function object that takes a file path
                            and returns a data object. Defines the
                            preprocessing routine
                            
    @param directory:       Directory on which the preprocessing is
                            performed. Must have a "data" and "cases"
                            subdirectory
    """
    def get_object(directory):
        """
        Generator object to return the file paths
        Use generator to avoid memory issues with large directories
        """
        for f in listdir(directory):
            if isfile(join(directory,f)):
                yield "file", f
            else:
                yield "subdir", f
    
    cases_path = join(directory, "cases", extention)
    data_path = join(directory, "data", extention)
    
    sub_dirs = []
    
    # Check all files in a directory
    # If there is a subdirectory iterate over it recursevly as well
    for f_type, f in get_object(cases_path):
        # 1) Perform preprocessing on all file objects
        if f_type == "file":
            data_object = python_callable(join(cases_path, f))

            file_name_raw = f.split(".")[0]
            with open(join(data_path, f"{file_name_raw}.pkl"), "wb") as file:
                pickle.dump(data_object, file)
        else:
            sub_dirs += [f]
    
    # 2) Recursively iterate over all subdirectories and
    # perform the preprocessing on all files in them as well
    while sub_dirs:
        print(sub_dirs)
        if sub_dirs:
            # build subdir to enable writing into it
            new_subdir = sub_dirs.pop()
            if not exists(join(data_path, new_subdir)):
                makedirs(join(data_path, new_subdir))
            build_data(python_callable, directory, join(extention,new_subdir))
        else:
            break

In [7]:
build_data(build_data_object, join(base_path, "vrptw_cordeau"))

In [5]:
build_data(build_data_object, join(base_path, "vrptw_solomon"))

['solomon_100', 'solomon_25', 'solomon_50']
['solomon_100', 'solomon_25']
['solomon_100']


In [6]:
build_data(build_data_object, join(base_path, "vrpldtt_fontaine"))

In [6]:
build_data(build_data_object, join(base_path, "vrpldtt_freytag"))

In [6]:
build_data(build_data_object, join(base_path, "vrptw_gehring_homberger"))

['homberger_1000', 'homberger_200', 'homberger_400', 'homberger_600', 'homberger_800']
['homberger_1000', 'homberger_200', 'homberger_400', 'homberger_600']
['homberger_1000', 'homberger_200', 'homberger_400']
['homberger_1000', 'homberger_200']
['homberger_1000']


# Pickle object parts (distance / time)

In [5]:
data_path = "C:\\Users\\manuf\\OneDrive\\Dokumente\\Universitaet\\Masterthesis\\data\\vrpldtt_fontaine\\data"
sol1 = pickle.load(open(join(data_path, "c203.pkl"), 'rb'))

Check the dimensionality of object

In [6]:
len([sol1.time_cube[i][0][7] for i in range(len(sol1.time_cube))])

15

Check the time (compared to pirmin)

In [8]:
sol1.time_cube[0][0][7]

2.0812030075187633