In [1]:
demand_path = "input"
output_path = "output"

seed = 0
suffix = ""

In [2]:
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import itertools
import geopandas as gpd
import os



In [3]:
assert os.path.exists("%s/confluence_areas.gpkg" % demand_path)
assert os.path.exists("%s/vrp_network.gpkg" % output_path)
assert os.path.exists("%s/vrp_nodes.gpkg" % output_path)
assert os.path.exists("%s/services.gpkg" % output_path)

assert os.path.exists("%s/slots.csv" % output_path)
assert os.path.exists("%s/homes.gpkg" % demand_path)

# Relevant information

In [4]:
df_spatial = gpd.read_file("%s/homes.gpkg" % demand_path)

In [5]:
df_slots = pd.read_csv("%s/slots%s.csv" % (output_path, suffix), sep = ";")

# Filter Confluence households

In [6]:
confluence_households = gpd.sjoin(
    df_spatial,
    gpd.read_file("%s/confluence_areas.gpkg" % demand_path),
    op = "within"
)["household_id"].unique()

In [7]:
df_confluence = df_slots[df_slots["household_id"].isin(confluence_households)]

In [8]:
df_nodes = gpd.read_file("output/vrp_nodes.gpkg")

In [9]:
# Find household nodes
import sklearn
import sklearn.neighbors

node_index = sklearn.neighbors.KDTree(
    np.vstack([df_nodes["geometry"].x.values, df_nodes["geometry"].y.values]).T
)

df_households = df_slots.drop_duplicates("household_id")[["household_id", "x", "y"]]

df_households["location_id"] = df_nodes.iloc[node_index.query(
    np.vstack([df_households["x"].values, df_households["y"].values]).T
)[1].flatten()]["id"].values

In [10]:
df_deliveries = pd.merge(df_confluence, df_households[["household_id", "location_id"]])

df_deliveries.to_csv(
    "%s/vrp_deliveries%s.csv" % (output_path, suffix), sep = ";"
)

In [11]:
len(df_deliveries)

287

## By Operator

In [17]:
df_services = gpd.read_file("output/services.gpkg")
df_services = df_services.groupby("group").aggregate({
    "weight": "sum",
    "geometry": "first",
    "entry": "first"
}).reset_index()
df_services["share"] = df_services["weight"] / df_services["weight"].sum()

In [20]:
df_services

Unnamed: 0,group,weight,geometry,entry,share
0,dhl,0.26,POINT (842704.337 6517559.429),gallieni,0.117647
1,dpd,0.67,POINT (849654.247 6511023.329),pasteur,0.303167
2,poste,0.63,POINT (841382.924 6517564.458),riboud,0.285068
3,tnt_fedex,0.36,POINT (842805.709 6512819.840),pasteur,0.162896
4,ups,0.29,POINT (858096.055 6522212.665),gallieni,0.131222


In [30]:
df_deliveries = df_deliveries.drop_duplicates("household_id").copy()

In [31]:
cdf = np.cumsum(df_services["share"])
names = df_services["group"]

random = np.random.RandomState(seed)
selection = [np.count_nonzero(cdf < r) for r in random.random(len(df_deliveries))]

df_deliveries["group"] = names.iloc[selection].values

In [32]:
df_morning = df_deliveries.copy()
df_morning["start_time"] = 8 * 3600
df_morning["end_time"] = 12 * 3600

df_evening = df_deliveries.copy()
df_evening["start_time"] = 13 * 3600
df_evening["end_time"] = 16 * 3600

df_deliveries = pd.concat([df_morning, df_evening])

In [33]:
for group in df_services["group"].unique():
    df_deliveries[
        df_deliveries["group"] == group
    ].to_csv("output/deliveries_for_%s%s.csv" % (group, suffix), sep = ";")

In [34]:
df_deliveries

Unnamed: 0,household_id,start_time,end_time,x,y,packages,location_id,shipper,group
0,1283558,28800,43200,841385.51476,6.517618e+06,1.0,5947881169,poste,poste
2,1289405,28800,43200,842083.35000,6.518032e+06,1.0,5947847646,poste,tnt_fedex
3,1292948,28800,43200,841602.17000,6.517852e+06,1.0,5324491691,poste,poste
6,1308705,28800,43200,841176.50000,6.517576e+06,1.0,6175361290,poste,poste
7,1322815,28800,43200,841416.78000,6.518074e+06,1.0,7017492270,ups,poste
...,...,...,...,...,...,...,...,...,...
207,1340401,46800,57600,841523.90000,6.517839e+06,1.0,6921021581,poste,poste
208,1667428,46800,57600,841382.60000,6.517588e+06,1.0,5947881169,poste,dhl
209,1310484,46800,57600,841896.20000,6.517896e+06,1.0,4574272249,ups,tnt_fedex
210,1975699,46800,57600,841664.51000,6.518090e+06,1.0,6970402894,ups,dhl
