In [135]:

import os
import pandas as pd
import numpy as np
import re
import collections
import pickle
from ast import literal_eval
import random
import scipy
import math
# from concorde_class import Concorde
import datetime
import subprocess
from ortools.constraint_solver import pywrapcp
from ortools.constraint_solver import routing_enums_pb2

In [136]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [137]:
# path for the 'Data' folder provided by DP
# global vars

dir_loc = '/content/drive/MyDrive/Analytics Project/AOP_DP_Analytics/Data'

path = '/content/drive/MyDrive/Analytics Project/AOP_DP_Analytics/'

In [138]:
# Global variables
REGION = 'Warmsen'

In [139]:
def generate_region_district(region_name):
  district_path = dir_region_instances = dir_loc + '/Instances/' + region_name + '/Districts'
  region_districts_list = []

  for filename in os.listdir(district_path):
    file_var = re.sub('.dat', '', filename)
    file_district = pd.read_csv(district_path + '/' + filename, sep='\t', skiprows = [0,1],
                                names=('PostObjectID', 'dum_1', 'dum_2', 'dum_3', 'dum_4', 'dum_5')
                                # usecols = [0]
                                )
    file_district['district'] = file_var
    region_districts_list.append(file_district)

  region_districts_df = pd.concat(region_districts_list, ignore_index = True)
  region_districts_df.drop(['dum_1', 'dum_2', 'dum_3', 'dum_4', 'dum_5'], axis = 1, inplace = True)
  
  return region_districts_df

In [140]:
# create dataframes for a region everyday

def generate_region_volume(region_name):
  # path of folder
  dir_region_instances = dir_loc + '/Instances/' + region_name
  dir_region_volume = dir_loc + '/Volumes/'
  # dir_region_districts = dir_region_instances + '/Districts'

  # load post_object-to-route_pos_id and post_point-to-post_object files 
  po_file = pd.read_csv(dir_region_instances + '/post_order_id_mapping.dat', sep='\t', names=('PostObjectId', 'RoutePosID'))
  pp_file = pd.read_csv(dir_region_instances + '/post_point_information.dat', sep='\t', names=('PostPointId', 'PostObjectId'))

  # pp_file adjustment by splitting list of post object ids
  pp_file['PostObjectId'] = pp_file['PostObjectId'].apply(literal_eval)
  pp_file = pp_file.explode('PostObjectId', ignore_index=True)

  # complete  post object list-district mapping
  region_district_df = generate_region_district(region_name = region_name)
  region_district_df.rename(columns = {'PostObjectID' : 'PostObjectId'}, inplace = True)

  # list of volume file paths
  day_names = []
  vol_path_list = []
  vol_day_map = {}
  day_map = {'mo' : 'Monday',
             'di' : 'Tuesday',
             'mi' : 'Wednesday',
             'do' : 'Thursday',
             'fr' : 'Friday',
             'sa' : 'Saturday'}

  for filename in os.listdir(dir_region_volume):
    vol_path_list.append(dir_region_volume + filename)
    day = filename[-6:-4]
    vol_day_map[filename] = day_map[day]

  # store dataframes of a region, map with post object id
  region_vol_day_dict = {}
  for vol_path in vol_path_list:
    vol_df = pd.read_csv(vol_path, sep = ';')
    vol_df.rename(columns = {'BRIEFE' : 'LETTERS',
                             'PAKETE' : 'PACKAGES',
                             'SONSTIGE' : 'OTHERS',
                             'ROUTEPOS_ID' : 'RoutePosID'},
                  inplace = True)
    
    # combining files to a complete table for a region
    vol_po_df = pd.merge(po_file, vol_df, on = 'RoutePosID', how = 'left')
    vol_po_df = pd.merge(pp_file, vol_po_df, on='PostObjectId', how='right')
    vol_po_df = pd.merge(region_district_df, vol_po_df, on='PostObjectId', how='right')

    # store dataframes in dict
    day_key = vol_day_map[vol_path[-18:]]
    region_vol_day_dict[day_key] = vol_po_df
    
  return region_vol_day_dict

In [141]:
# generate instances

def generate_instances(region_vol_day, scenario_type, scenario_method, scenario_number, growth_factor):
  # df = region_vol_day.copy() #.copy() used to avoid recopying on the original dataframe
  df = region_vol_day

  sce_letters = 'scenario_' + str(scenario_number) + '_letter'
  sce_packages = 'scenario_' + str(scenario_number) + '_package'
  sce_others = 'scenario_' + str(scenario_number) + '_others'

  df[sce_letters] = df['LETTERS'].apply(lambda x :scenario_type(pos_delivery = x, method = scenario_method, rate_pct = growth_factor ))
  df[sce_packages] = df['PACKAGES'].apply(lambda x :scenario_type(pos_delivery = x, method = scenario_method, rate_pct = growth_factor))
  df[sce_others] = df['OTHERS'].apply(lambda x :scenario_type(pos_delivery = x, method = scenario_method, rate_pct = growth_factor))

  sce_all = 'scenario_' + str(scenario_number) + '_all'
  df[sce_all] = df[sce_letters] + df[sce_packages] + df[sce_others]

  return df

In [142]:
# generate real instances based on poisson

def random_poisson_instances(pos_delivery, method, rate_pct):
  rate = rate_pct/100
  rng = np.random.default_rng()
  poisson_dist = rng.poisson(lam = pos_delivery * (1 + rate), size = 52)

  if method == 'random':
    return random.choice(poisson_dist)
  if method == 'mode':
    return scipy.stats.mode(poisson_dist, keepdims = True)[0][0]
  else:
    return 'only options : [random, mode]'

In [143]:
#Creating the complete dictionary of volumes for a region
warmsen_instances = {}

#INPUT Rate of mail change 
# rate in percentage
rate = 50
rate_mapping = {'low' : -rate, 'medium' : 0, 'high' : rate}

# create instances for one region
for key in generate_region_volume('Warmsen').keys():
  # create instances per day
  for key_rate in rate_mapping:
    dummy = generate_instances(region_vol_day = generate_region_volume('Warmsen')[key],
                               scenario_type = random_poisson_instances,
                               scenario_method = 'random',
                               scenario_number = key_rate,
                               growth_factor = rate_mapping[key_rate])
  warmsen_instances[key] = dummy
  

In [157]:
# get start and end node for a district
# delete df_day input

def start_end_points(region_name, district):
    # df_day_district = df_day[df_day['district'] == district]

    # get start and end node
    district_path = dir_loc + '/Instances/' + region_name + '/Districts/' + district + '.dat'
    # print(district_path)
    rows_needed = [1]
    file_district = pd.read_csv(district_path, sep='\t', skiprows = lambda x : x not in rows_needed,
                                names=('dum_0', 'start_point', 'end_point', 'dum_1', 'dum_2', 'dum_3', 'dum_4', 'dum_5', 'dum_6')
                                )
    start_point = file_district['start_point'][0]
    end_point = file_district['end_point'][0]

    points = [start_point, end_point]

    return points


In [149]:
# generate distance matrix and mapping for a scenario

def generate_distance_matrix_map(region_name, df_day_district, points, sce_col, district):
      scenario = sce_col

      col_use = ['PostPointId']
      col_use.append(scenario)
      df_day_district_scenario = df_day_district[col_use]
      
      # removing nodes with zero demand
      df_day_district_scenario_filtered = df_day_district_scenario[df_day_district_scenario[scenario] != 0]
      pp_id_day_district_scenario = df_day_district_scenario_filtered['PostPointId'].unique().tolist()
      
      # add start and end node if not in node list yet
      for point in points:
        if point not in pp_id_day_district_scenario:
          pp_id_day_district_scenario.append(point)

      # get distance file and dataframes
      distance_path = dir_loc + '/Instances/' + region_name + '/distances'
      district_distance = pd.read_csv(distance_path + '/distances_' + district + '.dat',
                                  names=['pp_1', 'pp_2', 'dist']
                                  )
      # remove unused postpoints
      district_distance_filtered = district_distance[(district_distance['pp_1'].isin(pp_id_day_district_scenario)) & (district_distance['pp_2'].isin(pp_id_day_district_scenario))]
      distance_matrix_df = district_distance_filtered.pivot(index = 'pp_1', columns = 'pp_2', values = 'dist')

      # generate and revise start and end nodes
      points = start_end_points(region_name = region_name, district = district)
      distance_matrix_df[points[0]][points[1]] = 0
      distance_matrix_df[points[1]][points[0]] = 0

      # distance matrix
      distance_matrix_array = distance_matrix_df.to_numpy()
      distance_matrix_array = distance_matrix_array.tolist()

      # node mapping
      map_val = list(range(0, len(distance_matrix_df)))
      nodes = distance_matrix_df.index.values.tolist()
      mapping = dict(zip(map_val, nodes))

      return distance_matrix_array, mapping

In [150]:
# heuristic google OR algo

class TSP:
    def __init__(self, distance_matrix):
        #read distance matrix --> list
        self.dist_mat = distance_matrix
        self.dist_mat = self.round_up(self.dist_mat)
        # Create data model
        self.data = self.create_data_model()
        # Create routing index manager
        self.manager = pywrapcp.RoutingIndexManager(len(self.data['distance_matrix']),
                                            self.data['num_vehicles'], self.data['depot'])
        # Create Routing Model
        self.routing = pywrapcp.RoutingModel(self.manager)
        # Define cost of each arc
        self.transit_callback_index = self.routing.RegisterTransitCallback(self.distance_callback)
        self.routing.SetArcCostEvaluatorOfAllVehicles(self.transit_callback_index)
    
    def round_up(self , lst):
        '''
        input : list of lists having non integral values
        output : list of lists having integral values.
        Function to round up numbers and return as integers
        '''
        rounded_lst = []
        for inner_lst in lst:
            rounded_inner_lst = []
            for num in inner_lst:
                rounded_num = int(round(num))
                rounded_inner_lst.append(rounded_num)
            rounded_lst.append(rounded_inner_lst)
        return rounded_lst        


    def create_data_model(self):
        # Stores the data for the problem
        data = {}
        data['distance_matrix'] = self.dist_mat 
        data['num_vehicles'] = 1
        data['depot'] = 0
        return data

    def distance_callback(self, from_index, to_index):
        # Returns the distance between the two nodes
        from_node = self.manager.IndexToNode(from_index)
        to_node = self.manager.IndexToNode(to_index)
        return self.data['distance_matrix'][from_node][to_node]

    def solve(self):
        # Setting first solution heuristic
        search_parameters = pywrapcp.DefaultRoutingSearchParameters()
        search_parameters.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
        # Solve the problem
        solution = self.routing.SolveWithParameters(search_parameters)
        if solution:
            # Get the route as an array
            route = []
            index = self.routing.Start(0)
            while not self.routing.IsEnd(index):
                node = self.manager.IndexToNode(index)
                route.append(node)
                index = solution.Value(self.routing.NextVar(index))
            # Get the objective value
            obj_value = solution.ObjectiveValue()
            # route = self.map_nodes(route)
            return route, obj_value

In [151]:
#FUNCTION TO GET SUBSET OF A COMPLETE TOUR AND ITS LENGTH KEEPING SEQUENCE BASED ON INSTANCES VOLUMES

#INPUTS:    Complete Tour (DP or Concord), Complete Tour info (DP or Concord), 
#           Distance Matrix for the instance (generated from instance generation function), Mapping of PostPoints Needed (Dictionary obtained from instance generation function)
#OUTPUTS:   Tour Sequence Fixed, Tour Length, Difference between New Tour and Complete Tour



def new_tour_sequence_fixed(complete_tour, dm_instance, mapping_pp_needed):
    #get the list of only the Postpoint needed for the specific instance
    list_pp_needed = list(mapping_pp_needed.values())

    #Keep from complete_tour only the needed PostPoints according to the list of PostPoints needed mantaining sequence
    tour_seq_fixed = [x for x in complete_tour if x in list_pp_needed]

    #Generate a dataframe from the distance matrix with the correct row-column combination based on PostPoints required
    df_dm_instance = pd.DataFrame(dm_instance)
    df_dm_instance = (df_dm_instance.rename(columns= mapping_pp_needed)).rename(index = mapping_pp_needed)

    #FUNCTION to compute length of new tour (Adapting using logic of function already created for default_route calculation: get_tour_length())
    
    def get_seq_tour_length(sequence_list, distance_df):
        tour_length = 0 
        for i in range(len(sequence_list)-1):
            # Specify the specific values for row and column in the dataframe
            PostPoint_predecesor = sequence_list[i]
            PostPoint_succesor = sequence_list[i+1]

            # Get the distance value needed based on the Post Point combination or raise error
            try:
                distance_value = distance_df[PostPoint_predecesor][PostPoint_succesor]
            except: 
                print('ERROR: No distance for PostPoint combination')

            # Aggreate sum value until end of loop to compute total tour length
            tour_length += distance_value
            
        return round(tour_length,2)
    
    #Calculate tour length of new tour which skips PostPoints
    tour_seq_fixed_length = get_seq_tour_length(tour_seq_fixed, df_dm_instance)
    

    return tour_seq_fixed, tour_seq_fixed_length

In [153]:
# upload files
all_original = pickle.load(open("/content/drive/MyDrive/Analytics Project/Coding/Results/all_original_complete.p", "rb"))
all_default_tours = pickle.load(open("/content/drive/MyDrive/Analytics Project/Coding/Results/all_default_tours.p", "rb"))


In [159]:
# code for running generate_distance_matrix_map and 

dir_region = '/content/drive/MyDrive/Analytics Project/Coding/' + REGION
if not os.path.exists(dir_region):
    os.mkdir(dir_region)
 # filter by day
for day in warmsen_instances.keys():
  df_day = warmsen_instances[day]
  # filter by district
  for district in df_day['district'].value_counts().index.tolist():
      df_day_district = df_day[df_day['district'] == district]

      scenario_list = df_day_district.columns.tolist()[7:]

      # get start and end node
      points = start_end_points(region_name = REGION, district = district)

      # running generate_distance_matrix_map
      for scenario in scenario_list:
        distance_matrix, mapping = generate_distance_matrix_map(region_name = REGION, df_day_district = df_day_district, points = points, sce_col = scenario, district = district)
        distance_matrix_name = 'dm_' + REGION + '_' + day + '_' + district + '_' + scenario + '_' + str(rate)
        mapping_name = 'map_' + REGION + '_' + day + '_' + district + '_' + scenario + '_' + str(rate)
        pickle.dump(distance_matrix, open(dir_region + '/' + "%s.p"%distance_matrix_name, "wb"))
        pickle.dump(mapping, open(dir_region + '/' + "%s.p"%mapping_name, "wb"))

        # running new_tour_sequence_fixed
        if district == '31603-06':
          tour_seq_fixed, tour_seq_fixed_length = new_tour_sequence_fixed(complete_tour = all_default_tours[REGION]['31603-06']['default_route'], dm_instance = distance_matrix, mapping_pp_needed = mapping)
          print('Tour Length: ', tour_seq_fixed_length)
          print('Tour: ', tour_seq_fixed)


Tour Length:  20929.46
Tour:  [0, 2230, 2228, 2224, 2222, 2225, 2221, 2211, 2212, 2213, 2208, 2207, 2210, 2419, 2420, 2421, 2418, 2416, 2413, 2412, 2411, 2410, 2409, 2266, 2264, 2262, 2263, 2261, 2267, 2584, 2585, 2349, 2350, 2351, 2352, 2346, 2347, 2354, 2353, 2389, 2391, 2608, 2611, 2610, 2609, 2612, 2204, 2205, 2206, 2282, 2283, 2316, 2317, 2318, 2319, 2436, 2439, 2440, 2441, 2437, 2435, 2302, 2303, 2305, 2306, 2617, 2618, 2620, 2621, 2622, 2615, 2614, 2613, 2623, 2326, 2327, 2328, 2330, 2331, 2332, 2323, 2324, 2203, 2334, 2335, 2336, 2333, 2337, 2477, 2478, 2476, 2479, 2481, 2482, 2484, 2485, 2487, 2489, 2491, 2493, 2214, 2215, 2216, 2217, 2218, 2321, 2322, 2494, 2588, 2591, 2592, 2593, 2590, 2594, 2586, 2310, 2312, 2308, 2386, 2387, 2388, 2385, 2291, 2292, 2293, 2295, 2287, 2285, 2288, 2284, 2297, 2298, 2289, 2497, 2498, 2495, 2504, 2505, 2501, 2507, 2500, 2508, 2509, 2510, 2511, 2513, 2512, 2514, 2515, 2517, 2519, 2520, 2521, 2522, 2524, 2525, 2526, 2527, 2528, 2530, 2533, 2534, 