# Amazon Last-Mile Route Challenge Data Analysis
by Guilherme Fernandes Alves, Oct 2022 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datetime import datetime

import numpy as np
import pandas as pd

import lmr_analyzer as lmr


# Testing lmr_analyzer library

Testing package object definition

In [3]:
example_pck = lmr.package(
    name="example_pck",
    dimensions=(25, 10, 7),  # width, height, length (cm)
    status="delivered",
    weight=1.5,  # kg
    price=9.90,  # $
)

example_pck.__dict__


{'name': 'example_pck',
 'dimensions': (25, 10, 7),
 'status': 'delivered',
 'weight': 1.5,
 'price': 9.9,
 'volume': 1750}

Testing stops objects definition

In [4]:
example_stp1 = lmr.stop(
    name="example_stp1",
    location=(0, 0),  # lat, lon
    location_type="pickup",
    time_window=(datetime(2020, 1, 1, 8, 0), datetime(2020, 1, 1, 8, 50)),
    packages=[example_pck],
    planned_service_time=0,
)
example_stp2 = lmr.stop(
    name="example_stp2",
    location=(0, 0),
    location_type="pickup",
    time_window=(datetime(2020, 1, 1, 9, 0), datetime(2020, 1, 1, 9, 10)),
    packages=[example_pck],
    planned_service_time=0,
)
example_stp1.__dict__


{'name': 'example_stp1',
 'location': (0, 0),
 'location_type': 'pickup',
 'time_window': (datetime.datetime(2020, 1, 1, 8, 0),
  datetime.datetime(2020, 1, 1, 8, 50)),
 'packages': [<lmr_analyzer.package.package at 0x2349064eb60>],
 'planned_service_time': 0,
 'packages_list': [<lmr_analyzer.package.package at 0x2349064eb60>],
 'delivery_time': datetime.timedelta(seconds=3000),
 'number_of_packages': 1,
 'number_of_delivered_packages': 1,
 'number_of_rejected_packages': 0,
 'number_of_attempted_packages': 0,
 'number_of_to_be_delivered_packages': 0,
 'total_volume_of_packages': 1750,
 'average_volume_of_packages': 1750.0,
 'total_volume_of_delivered_packages': [1750],
 'total_volume_of_rejected_packages': [],
 'total_volume_of_attempted_packages': [],
 'total_volume_of_to_be_delivered_packages': []}

Testing route objects definition

In [5]:
r1 = lmr.route(
    name="r1",
    stops=[example_stp1, example_stp2],
)
r1.set_actual_sequence(["example_stp1", "example_stp2"])
r1.evaluate_route_scores()

r1.__dict__


{'name': 'r1',
 'stops': {'example_stp1': <lmr_analyzer.stop.stop at 0x2349064c970>,
  'example_stp2': <lmr_analyzer.stop.stop at 0x2349064ea40>},
 'departure_time': None,
 'vehicle': None,
 'stops_names': ['example_stp1', 'example_stp2'],
 'number_of_stops': 2,
 'actual_sequence': [<lmr_analyzer.stop.stop at 0x2349064c970>,
  <lmr_analyzer.stop.stop at 0x2349064ea40>],
 'number_of_actual_stops': 2,
 'actual_sequence_names': ['example_stp1', 'example_stp2']}

In [9]:
a1 = lmr.analysis(
    name="a1",
    routes=[r1],
)
a1.__dict__


{'name': 'a1',
 'routes': [<lmr_analyzer.route.route at 0x2349064ca60>],
 'routes_dict': {'r1': <lmr_analyzer.route.route at 0x2349064ca60>}}

# Serializing the data

In [3]:
db = lmr.amz_serializer(
    root_directory="G:/Drives compartilhados/TF Eng Civil PTR - Guilherme_Felipe - 2022/1. BASES/AMAZON-ROUTING-CHALLENGE-2021/almrrc2021/almrrc2021-data-training/model_build_inputs"
)

# Get some nicknames
packages_dict, routes_dict = db.packages_dict, db.routes_dict


package_data.json has been loaded in 27.90 seconds.
route_data.json has been loaded in 16.10 seconds.
actual_sequences.json has been loaded in 1.10 seconds.
We are ready to proceed. All files have been loaded in 45.10 seconds.


In [5]:
def print_info_by_city():
    s = 0
    for city, dict in routes_dict.items():
        print("Number of routes in {}: {}".format(city, len(dict)))
        s += len(dict)
    print("Total number of routes: {}".format(s))

    # Print the percentage of each city
    for city, dict in routes_dict.items():
        print("Percentage of routes in {}: {:.2f}%".format(city, len(dict) / s * 100))


print_info_by_city()


Number of routes in Los Angeles: 2876
Number of routes in Seattle: 1079
Number of routes in Chicago: 1002
Number of routes in Boston: 929
Number of routes in Austin: 214
Total number of routes: 6100
Percentage of routes in Los Angeles: 47.15%
Percentage of routes in Seattle: 17.69%
Percentage of routes in Chicago: 16.43%
Percentage of routes in Boston: 15.23%
Percentage of routes in Austin: 3.51%


In [15]:
def export_routes_to_csv():
    # Write Los Angeles coordinates to a file
    with open("la_coordinates.csv", "w") as f:
        # Write header
        f.write("route,stop,lat,lon,distance_to_next_stop(km),duration(min)\n")
        # Iterate over routes
        for route in routes_dict["Los Angeles"].values():
            for stop in route.actual_sequence:
                f.write(
                    "{},{},{},{},-,-\n".format(
                        route.name, stop.name, stop.location[0], stop.location[1]
                    )
                )


# export_routes_to_csv()


# Analyze the data

Calculate drive distance for a number of stops

In [8]:
los_angeles = lmr.analysis(
    name="los_angeles",
    routes=list(routes_dict["Los Angeles"].values()),
)


In [28]:
def summarize_route_status_by_routes(analysis):
    """Summarize the status of the routes in the analysis. Each route will be
    represented by its centroid.

    Parameters
    ----------
    analysis : lmr.analysis
        An analysis object
    """
    for _, route in enumerate(analysis.routes):
        route.evaluate_route_status()
        route.calculate_route_centroid()

    master = {}

    for _, route in enumerate(analysis.routes):
        master[route.name] = {
            "centroid_lat": route.actual_sequence_centroid[0],
            "centroid_lon": route.actual_sequence_centroid[1],
            "number_of_packages": route.number_of_packages,
            "number_of_delivery_stops": route.number_of_delivery_stops,
            "number_of_delivered_packages": route.number_of_delivered_packages,
            "number_of_rejected_packages": route.number_of_rejected_packages,
            "number_of_attempted_packages": route.number_of_attempted_packages,
            "avg_packages_per_stop": route.avg_packages_per_stop,
            "rejected_packages_percentage": route.rejected_packages_percentage,
            "delivered_packages_percentage": route.delivered_packages_percentage,
            "attempted_packages_percentage": route.attempted_packages_percentage,
        }

    return master


master = summarize_route_status_by_routes(los_angeles)


In [32]:
master_pd = pd.DataFrame.from_dict(master, orient="index")
master_pd.head()


Unnamed: 0,centroid_lat,centroid_lon,number_of_packages,number_of_delivery_stops,number_of_delivered_packages,number_of_rejected_packages,number_of_attempted_packages,avg_packages_per_stop,rejected_packages_percentage,delivered_packages_percentage,attempted_packages_percentage
RouteID_00143bdd-0a6b-49ec-bb35-36593d303e77,34.096104,-118.284632,276,119,274,0,2,2.319328,0.0,0.992754,0.007246
RouteID_001b4ee3-c4f2-467f-932b-c85524d1021f,33.82383,-118.055348,241,142,241,0,0,1.697183,0.0,1.0,0.0
RouteID_0021a2aa-780f-460d-b09a-f301709e2523,33.852298,-117.773443,219,155,218,0,1,1.412903,0.0,0.995434,0.004566
RouteID_00575ca4-8a63-49d2-96c8-9b347be5ba6c,34.105233,-118.307342,211,59,204,0,7,3.576271,0.0,0.966825,0.033175
RouteID_00595235-52e7-4b68-883c-70482701dea0,33.994934,-118.415288,274,129,273,0,1,2.124031,0.0,0.99635,0.00365


In [34]:
master_pd.to_csv("master_pd_los_angeles_v1.csv")


In [None]:
los_angeles.analyze()
