### Script for Generating Default Route and its related needed information for all districts in a Region 

In [82]:
#Import needed libraries
import pandas as pd
import numpy as np
import random 
import os 
from ast import literal_eval
import pickle
import scipy

In [83]:
#Define directories (CHANGE IF YOU WANT TO TEST)
#Define your default path where the default Data directory is located
path = r'C:\Users\julli\OneDrive\Documentos\Deutshland\RWTH Aachen University\DDS Course\Summer Semester 23\Analytics Project\Notebooks Testing\JB Branch\Volume to Tailored'

#Define your default path to locate a folder to store files
path_save = r'C:\Users\julli\OneDrive\Documentos\Deutshland\RWTH Aachen University\DDS Course\Summer Semester 23\Analytics Project\Notebooks Testing\JB Branch\Default Routes'


In [84]:

#FUNCTION TO GET DEFEAULT TOUR AND ITS LENGTH BY DISTRICT NAME

#INPIUTS:   Region, District name
#OUTPUTS:   Tour Sequence, Tour Length, Dictionary for Mapping

def get_default_route(region, district_name):
    
    #Get directory path of district's assigned PostObjects
    district_po_path = os.path.join(path, 'Data', 'Instances', region, 'Districts', district_name + '.dat')

    #Get directory path of PostPoint-PostObject mapping for the region that district belongs to
    district_pp_po_mapping_path = os.path.join(path, 'Data', 'Instances', region, 'post_point_information' + '.dat')

    #Get directory path of distances between Post Points file fo the district
    district_pp_distances_path = os.path.join(path, 'Data', 'Instances', region, 'distances', 'distances_' + district_name + '.dat')

    #Generate the dataframe for the PostPoint distance combinations
    df_pp_district_distances = pd.read_csv(district_pp_distances_path, sep = ',', header = None, names = ['PostPoint1', 'PostPoint2','Distance'])


    #Generate the needed PostObjects dataframe in their assigned sequence. Also get start and end PostPoints as list
    df_district_po = pd.read_csv(district_po_path, sep = '\t', skiprows= 2, usecols=[0], header = None, names = ['PostObjectId'])

    df_start_end_pp = pd.read_csv(district_po_path, sep = '\t', skiprows= 1, nrows=1, usecols=[1,2], header = None, names = ['StartPostPoint','EndPostPoint'])


    #Get the needed PostPoint-PostObject dataframe for merging
    df_district_pp_po_mapping = pd.read_csv(district_pp_po_mapping_path, sep='\t', names=('PostPointId', 'PostObjectId'))
    df_district_pp_po_mapping['PostObjectId'] = df_district_pp_po_mapping['PostObjectId'].apply(literal_eval)
    df_district_pp_po_mapping = df_district_pp_po_mapping.explode('PostObjectId', ignore_index=True)

    #Create a dataframe of the PostPoint in Order Sequence by merging and preprocessing previous dataframes
    df_district_pp_sequence = merged_data = pd.merge(df_district_po, df_district_pp_po_mapping , on='PostObjectId', how = 'left').drop('PostObjectId', axis=1)

    #Save the value of the amount of PostObject ID Visited
    PO_touching_rate = len(df_district_pp_sequence)
    
    #get the values from start and end PostPoint separately
    start_point = df_start_end_pp.iloc[0,0]
    end_point = df_start_end_pp.iloc[0,1]

    #FUNCTION to Add start and end point on their respective initial and end position on the PostPoint dataframe. Check first if they exist on those positions.
    def add_start_end_point(point_1, point_2, df):
        #for start point
        if df.loc[0,'PostPointId'] == point_1:
            pass
        else: 
           new_row_start = pd.DataFrame([point_1], columns=df.columns)
           df = pd.concat([new_row_start, df], ignore_index=True) 
        #for end point
        if df.loc[len(df)-1,'PostPointId'] == point_2:
            pass
        else: 
            new_row_end = pd.DataFrame([point_2], columns=df.columns)
            df = pd.concat([df, new_row_end], ignore_index=True) 
        return df

    df_district_pp_sequence = add_start_end_point(point_1 = start_point, point_2= end_point, df = df_district_pp_sequence)

    
    #FUNCTION to delete all "consecutive" PostPointId numbers from the list and keep sequence
    #(PostObjectID on the same building being visited consecutively means just one visit to that PostPointID is needed on that trip)

    def delete_consecutive_duplicates(df, column_name):
        #Identify consecutive duplicates by calculating the difference
        differences = df[column_name].diff()

        # Keep the rows where the difference is non-zero
        df = df.loc[differences != 0]
        df = df.reset_index(drop=True)
        
        return df

    df_district_pp_sequence_filtered = delete_consecutive_duplicates(df = df_district_pp_sequence, column_name= 'PostPointId')

    #Save the value of the amount of PostPoint Trips
    PP_visit_rate = len(df_district_pp_sequence)

    #FUNTION to count number of duplicate PostPoints which are not consecutive
    #(if value > 1, PostPoints are visited and then returned in a non consecutive manner on current routing)
    #[Case is 1 if it is a tour when start and end point is the same, Case is 0 if is not a tour])

    def count_repeated_pp(df):
        df2 = df.groupby(['PostPointId'])['PostPointId'].count().to_frame()
        repeated = len(df2[df2['PostPointId']>1])
        return repeated

    repeated_pp_num = count_repeated_pp(df_district_pp_sequence_filtered)
    
    
    #Convert the default sequence to list
    district_pp_sequence_filtered = df_district_pp_sequence_filtered['PostPointId'].values.tolist()
    
    #FUNCTION to calculate total tour length
    def get_tour_length(sequence_list, distance_df):
        tour_length = 0 
        for i in range(len(sequence_list)-1):
            # Specify the specific values for column 1 and column 2
            value_column1 = sequence_list[i]
            value_column2 = sequence_list[i+1]

            # Filter the dataframe based on the specified values
            match_df = distance_df.loc[(distance_df['PostPoint1'] == value_column1) & (distance_df['PostPoint2'] == value_column2)]

            # Retrieve the value in distance column or give error if does not exist
            distance_pp = match_df['Distance'].values[0] if not match_df.empty else 'ERROR: No distance for PostPoint combination'
            tour_length += distance_pp
            
        return round(tour_length,2)

    district_tour_length = get_tour_length(district_pp_sequence_filtered, df_pp_district_distances)

    #Create a dictionary as a mapping of PostPoint to Index Position in list (In case is needed or can be useful)
    mapping_district = dict(zip([i for i in range(len(district_pp_sequence_filtered))] ,district_pp_sequence_filtered))
    
    
    return  district_pp_sequence_filtered, mapping_district, district_tour_length, repeated_pp_num


get_default_route(region = 'Warmsen', district_name = '31600-03')



([0,
  877,
  885,
  878,
  883,
  876,
  879,
  880,
  881,
  882,
  884,
  901,
  905,
  904,
  902,
  903,
  906,
  907,
  908,
  909,
  910,
  911,
  912,
  943,
  944,
  942,
  940,
  941,
  760,
  751,
  758,
  756,
  765,
  761,
  757,
  766,
  752,
  754,
  755,
  759,
  753,
  762,
  763,
  764,
  1099,
  1094,
  1092,
  1095,
  1100,
  1101,
  1102,
  1103,
  1104,
  1105,
  1106,
  1107,
  1108,
  1109,
  1110,
  1111,
  1112,
  1097,
  1093,
  1098,
  1096,
  1091,
  849,
  850,
  851,
  852,
  848,
  853,
  1117,
  1116,
  1114,
  1113,
  1115,
  1119,
  1118,
  925,
  918,
  929,
  928,
  927,
  926,
  930,
  932,
  924,
  922,
  921,
  920,
  935,
  934,
  931,
  919,
  933,
  923,
  767,
  8,
  768,
  857,
  858,
  856,
  855,
  859,
  854,
  860,
  861,
  862,
  863,
  864,
  865,
  866,
  867,
  958,
  959,
  960,
  771,
  772,
  773,
  774,
  775,
  776,
  777,
  778,
  779,
  780,
  781,
  770,
  782,
  783,
  784,
  769,
  785,
  786,
  787,
  788,
  789,
  871,
  

In [85]:
#FUNCTION to get the list of all districts on a specific region by looping existing entries in the folder
def get_district_list(region):
    districts_list = []
    districts_name_path = os.path.join(path, 'Data', 'Instances', region, 'Districts')
    for filename in os.listdir(districts_name_path):
        districts_list.append(filename[:-4])
    return districts_list

     

In [86]:
#FUNCTION to generate all default route information for an entire region
def get_region_all_default_routes(region_chosen):

     dir_region = os.path.join(path_save, region_chosen + '_Default_Routes')
     if not os.path.exists(dir_region):
          os.mkdir(dir_region)
     
     district_list = get_district_list(region = region_chosen)

     for district in district_list: 
          route_list, district_mapping, total_length, repetitions = get_default_route(region = region_chosen, district_name = district)
          #save default route sequence list as pickle file
          pickle.dump(route_list, open(os.path.join(dir_region, district + '_default_route.p'), 'wb'))
          
          #save data regarding total length of tour, repeating nodes, and mapping info on .txt file
          file = open(os.path.join(dir_region, district + 'default_route_info.txt'),'w')
          file.write('Total tour length in seconds: \n{}\n'.format(total_length))
          file.write('Total tour length in hours: \n{}\n'.format(round(total_length/3600,1)))
          file.write('Number of PostPoint visited more than once: \n{}\n'.format(repetitions))
          file.write('Mapping: \n{}\n'.format(district_mapping))
          
          file.close()



In [87]:
%%time
#Run function to obtain folder with files for an entire region
#INPUT REGION CHOSEN 
get_region_all_default_routes('Warmsen')

CPU times: total: 1.53 s
Wall time: 3.35 s
