In [2]:
%autosave 0

Autosave disabled


In [106]:
import os
import pandas as pd
import numpy as np
import re
import collections
import pickle
from ast import literal_eval
import random
import scipy

#Define Path of Files

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# path for the 'Data' folder provided by DP
dir_loc = '/content/drive/Shareddrives/Private Unlimited Drive #1/DDS/Analytics Project/AOP_DP_Analytics/Data'

#Functions

## Volume Matrix Manipulation and Poisson Instances Generation


In [96]:
# create dataframes for a region everyday

def generate_region_volume(region_name):
  # path of folder
  dir_region_instances = dir_loc + '/Instances/' + region_name
  dir_region_volume = dir_loc + '/Volumes/'
  # dir_region_districts = dir_region_instances + '/Districts'

  # load post_object-to-route_pos_id and post_point-to-post_object files 
  po_file = pd.read_csv(dir_region_instances + '/post_order_id_mapping.dat', sep='\t', names=('PostObjectId', 'RoutePosID'))
  pp_file = pd.read_csv(dir_region_instances + '/post_point_information.dat', sep='\t', names=('PostPointId', 'PostObjectId'))

  # pp_file adjustment by splitting list of post object ids
  pp_file['PostObjectId'] = pp_file['PostObjectId'].apply(literal_eval)
  pp_file = pp_file.explode('PostObjectId', ignore_index=True)

  # list of volume file paths
  day_names = []
  vol_path_list = []
  vol_day_map = {}
  day_map = {'mo' : 'Monday',
             'di' : 'Tuesday',
             'mi' : 'Wednesday',
             'do' : 'Thursday',
             'fr' : 'Friday',
             'sa' : 'Saturday'}

  for filename in os.listdir(dir_region_volume):
    vol_path_list.append(dir_region_volume + filename)
    day = filename[-6:-4]
    vol_day_map[filename] = day_map[day]

  # store dataframes of a region, map with post object id
  region_vol_day_dict = {}
  for vol_path in vol_path_list:
    vol_df = pd.read_csv(vol_path, sep = ';')
    vol_df.rename(columns = {'BRIEFE' : 'LETTERS',
                             'PAKETE' : 'PACKAGES',
                             'SONSTIGE' : 'OTHERS',
                             'ROUTEPOS_ID' : 'RoutePosID'},
                  inplace = True)
    
    # combining files to a complete table for a region
    vol_po_df = pd.merge(po_file, vol_df, on = 'RoutePosID', how = 'left')
    vol_po_df = pd.merge(pp_file, vol_po_df, on='PostObjectId', how='right')

    # store dataframes in dict
    day_key = vol_day_map[vol_path[-18:]]
    region_vol_day_dict[day_key] = vol_po_df
    
  return region_vol_day_dict

In [100]:
# generate instances

def generate_instances(region_vol_day, scenario_type, scenario_method, scenario_number):
  df = region_vol_day.copy() #.copy() used to avoid recopying on the original dataframe

  sce_letters = 'scenario_' + str(scenario_number) + '_letter'
  sce_packages = 'scenario_' + str(scenario_number) + '_package'
  sce_others = 'scenario_' + str(scenario_number) + '_others'

  df[sce_letters] = df['LETTERS'].apply(lambda x :scenario_type(pos_delivery = x, method = scenario_method))
  df[sce_packages] = df['PACKAGES'].apply(lambda x :scenario_type(pos_delivery = x, method = scenario_method))
  df[sce_others] = df['OTHERS'].apply(lambda x :scenario_type(pos_delivery = x, method = scenario_method))

  sce_all = 'scenario_' + str(scenario_number) + '_all'
  df[sce_all] = df[sce_letters] + df[sce_packages] + df[sce_others]

  return df

In [101]:
# generate real instances based on poisson

def random_poisson_instances(pos_delivery, method):
  rng = np.random.default_rng()
  poisson_dist = rng.poisson(lam = pos_delivery, size = 52)

  if method == 'random':
    return random.choice(poisson_dist)
  if method == 'mode':
    return scipy.stats.mode(poisson_dist, keepdims = True)[0][0]
  else:
    return 'only options : [random, mode]'

#Process Examples (Using Functions)

In [88]:
%%time
region_vol_day_dict = generate_region_volume(region_name = "Warmsen")

CPU times: user 10.6 s, sys: 427 ms, total: 11 s
Wall time: 11.9 s


In [107]:
%%time
#Creating the complete dictionary of volumes for a region
warmsen_instances = {}

# create instances for one region
for key in region_vol_day_dict.keys():
  # create instances per day
  dummy = generate_instances(region_vol_day = region_vol_day_dict[key],
                           scenario_type = random_poisson_instances,
                           scenario_method = 'mode',
                           scenario_number = 1)
  warmsen_instances[key] = dummy

CPU times: user 16.8 s, sys: 395 ms, total: 17.2 s
Wall time: 17.5 s


In [108]:
warmsen_instances['Tuesday']

Unnamed: 0,PostPointId,PostObjectId,RoutePosID,LETTERS,PACKAGES,OTHERS,scenario_1_letter,scenario_1_package,scenario_1_others,scenario_1_all
0,16,16,AD14E08623FF9F5AD2293E7DEDB4F4B1,5.796667,0.690000,0.273333,7,0,0,7
1,17,17,F48CB0CAB0BC117AE030007F0100574C,5.796667,0.690000,0.273333,5,0,0,5
2,16,18,F48CB0CAB0C7117AE030007F0100574C,5.796667,0.690000,0.273333,7,0,0,7
3,18,19,F48CB0CAB0C8117AE030007F0100574C,0.000000,0.000000,0.000000,0,0,0,0
4,19,20,069AE286923C1800E040400A09131416,0.000000,0.000000,0.000000,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
5534,5221,5550,F48CB0CADA37117AE030007F0100574C,0.770000,0.112143,0.334286,0,0,0,0
5535,5222,5551,F48CB0CADA38117AE030007F0100574C,1.540000,0.224286,0.668571,1,0,0,1
5536,5223,5552,F48CB0CADA3A117AE030007F0100574C,0.770000,0.112143,0.334286,0,0,0,0
5537,5224,5553,F48CB0CADA3B117AE030007F0100574C,0.770000,0.112143,0.334286,1,0,0,1
