# Amazon Last-Mile Route Challenge Data Analysis
by Guilherme Fernandes Alves, Oct 2022 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datetime import datetime

import boto3
from botocore import UNSIGNED
from botocore.config import Config

import lmr_analyzer as lmr


# Testing lmr_analyzer library

Testing package object definition

In [3]:
example_pck = lmr.package(
    name="example_pck",
    dimensions=(25, 10, 7),  # width, height, length (cm)
    status="delivered",
    weight=1.5,  # kg
    price=9.90,  # $
)

example_pck.__dict__


{'name': 'example_pck',
 'dimensions': (25, 10, 7),
 'status': 'delivered',
 'weight': 1.5,
 'price': 9.9}

Testing stops objects definition

In [4]:
example_stp1 = lmr.stop(
    name="example_stp1",
    location=(0, 0),  # lat, lon
    location_type="pickup",
    time_window=(datetime(2020, 1, 1, 8, 0), datetime(2020, 1, 1, 8, 50)),
    packages=[example_pck],
    planned_service_time=0,
)
example_stp2 = lmr.stop(
    name="example_stp2",
    location=(0, 0),
    location_type="pickup",
    time_window=(datetime(2020, 1, 1, 9, 0), datetime(2020, 1, 1, 9, 10)),
    packages=[example_pck],
    planned_service_time=0,
)
example_stp1.__dict__


{'name': 'example_stp1',
 'location': (0, 0),
 'location_type': 'pickup',
 'time_window': (datetime.datetime(2020, 1, 1, 8, 0),
  datetime.datetime(2020, 1, 1, 8, 50)),
 'packages': [<lmr_analyzer.package.package at 0x15ac94d3050>],
 'planned_service_time': 0,
 'packages_list': [<lmr_analyzer.package.package at 0x15ac94d3050>],
 'status_list': ['delivered']}

Testing route objects definition

In [5]:
r1 = lmr.route(
    name="r1",
    stops=[example_stp1, example_stp2],
)
r1.set_actual_sequence(["example_stp1", "example_stp2"])
# r1.evaluate_route_scores()

r1.__dict__


{'name': 'r1',
 'stops': {'example_stp1': <lmr_analyzer.stop.stop at 0x15acaa30510>,
  'example_stp2': <lmr_analyzer.stop.stop at 0x15acaa32dd0>},
 'departure_time': None,
 'vehicle': None,
 'stops_names': ['example_stp1', 'example_stp2'],
 'number_of_stops': 2,
 'actual_sequence': [<lmr_analyzer.stop.stop at 0x15acaa30510>,
  <lmr_analyzer.stop.stop at 0x15acaa32dd0>],
 'number_of_actual_stops': 2,
 'actual_sequence_names': ['example_stp1', 'example_stp2']}

In [6]:
a1 = lmr.analysis(
    name="a1",
    routes=[r1],
)
a1.__dict__


{'name': 'a1',
 'routes': [<lmr_analyzer.route.route at 0x15ac91bb750>],
 'routes_dict': {'r1': <lmr_analyzer.route.route at 0x15ac91bb750>}}

# Serializing the data

## Connecting to the Amazon database

Create a boto3 object with anonymized credentials

In [7]:
s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))


List all the files available in the database.
It depends on your internet connection, it may take around 4 minutes.
Could be a good idea to save files locally after downloading. 

In [8]:
!aws s3 ls --no-sign-request "s3://amazon-last-mile-challenges/almrrc2021/almrrc2021-data-training/model_build_inputs/"

2022-06-03 04:51:31    9665078 actual_sequences.json
2022-06-03 04:51:32     414742 invalid_sequence_scores.json
2022-06-03 04:51:21  375437806 package_data.json
2022-06-03 04:51:33         74 readme.md
2022-06-03 04:51:21   78972162 route_data.json
2022-06-03 04:51:21 1817146363 travel_times.json


Downloading the files

In [None]:
!aws s3 cp --no-sign-request "s3://amazon-last-mile-challenges/almrrc2021/almrrc2021-data-training/model_build_inputs/" ../../data/last_mile_data/amazon-last-mile-challenges/  --recursive 

## Run the serializing process

In [11]:
db = lmr.amz_serializer(
    root_directory="../../data/last_mile_data/amazon-last-mile-challenges/"
)

# Get some nicknames
packages_dict, routes_dict = db.packages_dict, db.routes_dict


package_data.json has been loaded in 14.20 seconds.
route_data.json has been loaded in 10.65 seconds.
actual_sequences.json has been loaded in 0.95 seconds.
We are ready to proceed. All files have been loaded in 25.79 seconds.


In [12]:
db.print_info_by_city()


Number of routes in Los Angeles : 2876
Number of routes in Seattle     : 1079
Number of routes in Chicago     : 1002
Number of routes in Boston      : 929
Number of routes in Austin      : 214
Total number of routes:          6100

Percentage of routes in Los Angeles : 47.15%
Percentage of routes in Seattle     : 17.69%
Percentage of routes in Chicago     : 16.43%
Percentage of routes in Boston      : 15.23%
Percentage of routes in Austin      : 3.51%


# Create the distance matrix object

In [None]:
dist_matrix = lmr.distance_matrix()
dist_matrix.load_support_matrix_file(
    filename="../../data/driving_distances/los_angeles.csv"
)
dist_matrix.calculate_matrix_statistics()
dist_matrix.print_info()
# TODO: Add a __get__ method to the distance_matrix class to return the matrix as a numpy array


# Analyze the data

In [None]:
los_angeles = lmr.analysis(
    name="los_angeles",
    routes=list(routes_dict["Los Angeles"].values()),  # A list of route objects
)


Circuity Factor analysis

In [None]:
los_angeles.calculate_euclidean_distances(planned=False, actual=True)
los_angeles.calculate_driving_distances(
    planned=False, actual_distance_matrix=dist_matrix.routes_matrix
)
los_angeles.calculate_circuity_factor(planned=False, actual=True)


In [None]:
los_angeles.plot_circuity_factor()


Packages status analysis

In [None]:
los_angeles.calculate_packages_status()


Centroid analysis

In [None]:
los_angeles.calculate_centroids()


Calculate bbox areas

In [None]:
los_angeles.calculate_each_route_bbox()


In [None]:
los_angeles.find_overall_bbox()


Export data to CSV

In [None]:
los_angeles.summarize_by_routes()


In [None]:
los_angeles.export_summary_by_routes(filename="summary_by_routes_LA_18nov_v1.csv")
