# lmr_analyzer Example
by Guilherme Fernandes Alves, Oct 2022 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datetime import datetime

import boto3
from botocore import UNSIGNED
from botocore.config import Config

import lmr_analyzer as lmr

## Testing lmr_analyzer library

Testing package object definition

In [3]:
example_pck = lmr.Package(
    name="example_pck",
    dimensions=(25, 10, 7),  # width, height, length (cm)
    status="delivered",
    weight=1.5,  # kg
    price=9.90,  # $
)

example_pck.print_info()

Package name: example_pck
Dimensions: (25, 10, 7)
Status: delivered
Weight: 1.5
Price: 9.9
Volume: 1750


Testing stops objects definition

In [4]:
example_stp1 = lmr.Stop(
    name="example_stp1",
    location=(0, 0),  # lat, lon
    location_type="pickup",
    time_window=(datetime(2020, 1, 1, 8, 0), datetime(2020, 1, 1, 8, 50)),
    packages=[example_pck],
    planned_service_time=0,
)
example_stp2 = lmr.Stop(
    name="example_stp2",
    location=(0, 0),
    location_type="pickup",
    time_window=(datetime(2020, 1, 1, 9, 0), datetime(2020, 1, 1, 9, 10)),
    packages=[example_pck],
    planned_service_time=0,
)
example_stp1.__dict__

{'name': 'example_stp1',
 'location': (0, 0),
 'location_type': 'pickup',
 'time_window': (datetime.datetime(2020, 1, 1, 8, 0),
  datetime.datetime(2020, 1, 1, 8, 50)),
 'packages': [<lmr_analyzer.package.Package at 0x20114377650>],
 'planned_service_time': 0,
 'packages_list': [<lmr_analyzer.package.Package at 0x20114377650>],
 'status_list': ['delivered']}

Testing route objects definition

In [5]:
r1 = lmr.Route(
    name="r1",
    stops=[example_stp1, example_stp2],
)
r1.set_actual_sequence(["example_stp1", "example_stp2"])
# r1.evaluate_route_scores()

r1.__dict__

{'name': 'r1',
 'stops': {'example_stp1': <lmr_analyzer.stop.Stop at 0x2011560b680>,
  'example_stp2': <lmr_analyzer.stop.Stop at 0x201156618b0>},
 'departure_time': None,
 'vehicle': None,
 'stops_names': ['example_stp1', 'example_stp2'],
 'number_of_stops': 2,
 'actual_sequence': [<lmr_analyzer.stop.Stop at 0x2011560b680>,
  <lmr_analyzer.stop.Stop at 0x201156618b0>],
 'number_of_actual_stops': 2,
 'actual_sequence_names': ['example_stp1', 'example_stp2']}

In [6]:
a1 = lmr.Analysis(
    name="a1",
    routes=[r1],
)
a1.__dict__

{'name': 'a1',
 'routes': [<lmr_analyzer.route.Route at 0x20115477b00>],
 'routes_dict': {'r1': <lmr_analyzer.route.Route at 0x20115477b00>}}

## Serializing Amazon data

### Connecting to the Amazon database

Create a boto3 object with anonymized credentials

In [7]:
s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))

List all the files available in the database.
It depends on your internet connection, it may take around 4 minutes.
Could be a good idea to save files locally after downloading. 

In [8]:
!aws s3 ls --no-sign-request "s3://amazon-last-mile-challenges/almrrc2021/almrrc2021-data-training/model_build_inputs/"

2022-06-02 23:51:31    9665078 actual_sequences.json
2022-06-02 23:51:32     414742 invalid_sequence_scores.json
2022-06-02 23:51:21  375437806 package_data.json
2022-06-02 23:51:33         74 readme.md
2022-06-02 23:51:21   78972162 route_data.json
2022-06-02 23:51:21 1817146363 travel_times.json


Downloading the files

In [9]:
!aws s3 cp --no-sign-request "s3://amazon-last-mile-challenges/almrrc2021/almrrc2021-data-training/model_build_inputs/" ../../data/last_mile_data/amazon-last-mile-challenges/  --recursive

Completed 256.0 KiB/2.1 GiB (153.4 KiB/s) with 6 file(s) remaining
Completed 512.0 KiB/2.1 GiB (298.8 KiB/s) with 6 file(s) remaining
Completed 768.0 KiB/2.1 GiB (439.7 KiB/s) with 6 file(s) remaining
Completed 1.0 MiB/2.1 GiB (573.3 KiB/s) with 6 file(s) remaining  
Completed 1.2 MiB/2.1 GiB (708.5 KiB/s) with 6 file(s) remaining  
Completed 1.4 MiB/2.1 GiB (780.7 KiB/s) with 6 file(s) remaining  
download: s3://amazon-last-mile-challenges/almrrc2021/almrrc2021-data-training/model_build_inputs/invalid_sequence_scores.json to ..\..\data\last_mile_data\amazon-last-mile-challenges\invalid_sequence_scores.json
Completed 1.4 MiB/2.1 GiB (780.7 KiB/s) with 5 file(s) remaining
Completed 1.6 MiB/2.1 GiB (880.6 KiB/s) with 5 file(s) remaining
Completed 1.9 MiB/2.1 GiB (1010.2 KiB/s) with 5 file(s) remaining
Completed 2.1 MiB/2.1 GiB (1.1 MiB/s) with 5 file(s) remaining   
Completed 2.4 MiB/2.1 GiB (1.2 MiB/s) with 5 file(s) remaining   
Completed 2.6 MiB/2.1 GiB (1.3 MiB/s) with 5 file(s) rema

### Run the serializing process

In [10]:
db = lmr.AmazonSerializer(
    root_directory="../../data/last_mile_data/amazon-last-mile-challenges/"
)

# Get some nicknames
packages_dict, routes_dict = db.packages_dict, db.routes_dict

package_data.json has been loaded in 6.40 seconds.
route_data.json has been loaded in 7.44 seconds.
actual_sequences.json has been loaded in 0.64 seconds.
We are ready to proceed. All files have been loaded in 14.48 seconds.


In [11]:
db.print_info_by_city()

Number of routes in Los Angeles : 2876
Number of routes in Seattle     : 1079
Number of routes in Chicago     : 1002
Number of routes in Boston      : 929
Number of routes in Austin      : 214
Total number of routes:          6100

Percentage of routes in Los Angeles : 47.15%
Percentage of routes in Seattle     : 17.69%
Percentage of routes in Chicago     : 16.43%
Percentage of routes in Boston      : 15.23%
Percentage of routes in Austin      : 3.51%


## Create the DistanceMatrix object

In [19]:
dist_matrix = lmr.DistanceMatrix()
dist_matrix.load_support_matrix_file(
    filename="../../../data/driving_distances/los_angeles.csv"
)
dist_matrix.calculate_matrix_statistics()
dist_matrix.print_info()
# TODO: Add a __get__ method to the distanceMatrix class to return the matrix as a numpy array

Awesome, the distance matrix loaded successfully!
The routes matrix was also loaded and saved as an attribute.
Number of distances stored: 412440
Maximum distance:           133.346 km
Minimum distance:           0.000 km
Average distance:           0.586 km
Standard deviation:         2.968 km


## Analyze the data

In [21]:
los_angeles = lmr.Analysis(
    name="los_angeles",
    routes=list(routes_dict["Los Angeles"].values()),  # A list of route objects
)

los_angeles.calculate_euclidean_distances(planned=False, actual=True)
los_angeles.calculate_driving_distances(
    planned=False, actual_distance_matrix=dist_matrix.routes_matrix
)
los_angeles.calculate_circuity_factor(planned=False, actual=True)
los_angeles.calculate_packages_status()
los_angeles.calculate_centroids()
los_angeles.calculate_each_route_bbox()
los_angeles.find_overall_bbox()

df_sum_la = los_angeles.summarize_by_routes()



## Going beyond

In [22]:
packages = 0
rejected = 0
failed_at = 0

for route in df_sum_la.values():
    packages += route.get("Number of packages", 0)
    rejected += route.get("Number of rejected packages", 0)
    failed_at += route.get("Number of failed attempted packages", 0)

# Calculate the percentage of rejected packages
rejected_p = 100 * rejected / packages
failed_p = 100 * failed_at / packages
print(f"Percentage of rejected packages: {rejected_p:.5}")
print(f"Percentage of failed attempted packages: {failed_p:.5}")

Percentage of rejected packages: 0.0026186
Percentage of failed attempted packages: 0.93309


Export data to CSV

In [23]:
los_angeles.summarize_by_routes()

{'RouteID_00143bdd-0a6b-49ec-bb35-36593d303e77': {'Name': 'RouteID_00143bdd-0a6b-49ec-bb35-36593d303e77',
  'Centroid Lat - mean - (deg)': np.float64(34.09610363559322),
  'Centroid Lon - mean - (deg)': np.float64(-118.28463171186442),
  'Centroid Lat - stdev - (deg)': np.float64(0.0035569612475200778),
  'Centroid Lon - stdev - (deg)': np.float64(0.004672390545374135),
  'Number of delivery stops': 118,
  'Number of depot stops': 1,
  'Number of packages': 276,
  'Number of delivered packages': 274,
  'Number of rejected packages': 0,
  'Number of failed attempted packages': 2,
  'Avg packages per stop': 2.3389830508474576,
  'Rejected packages (%)': 0.006258692628650904,
  'Delivered packages (%)': 0.9906428937200317,
  'Failed attempted packages (%)': 0.009330920348214701,
  'Bbox area - (km^2)': np.float64(291.9761331135552),
  'Bbox north - (deg)': 34.102251,
  'Bbox south - (deg)': 34.007369,
  'Bbox east - (deg)': -118.143927,
  'Bbox west - (deg)': -118.294127,
  'Total Euclide

In [None]:
# los_angeles.export_summary_by_routes(filename="export.csv")