# Checking Container Dwell Times

This works with the CSV export of ConFlowGen.

Import libraries

In [None]:
import os
import pathlib
import ipywidgets as widgets
import pandas as pd
from IPython.display import Markdown
import matplotlib.pyplot as plt
from matplotlib import gridspec

Select input data

In [None]:
folder_of_this_jupyter_notebook = pathlib.Path.cwd()
export_folder = os.path.join(
    folder_of_this_jupyter_notebook,
    os.pardir,
    os.pardir,
    os.pardir,
    "conflowgen",
    "data",
    "exports"
)
folders = [
    folder
    for folder in os.listdir(export_folder)
    if os.path.isdir(
        os.path.join(
            export_folder,
            folder
        )
    )
]

dropdown_field = widgets.Dropdown(
    options=list(reversed(folders)),  # always show the newest first
    description='',
    layout={'width': 'max-content'}
)
dropdown_label = widgets.Label(value="Select the exported output: ")
display(widgets.HBox([dropdown_label, dropdown_field]))

In [None]:
path_to_selected_exported_content = os.path.join(
    export_folder,
    dropdown_field.value
)

print("Working with directory " + path_to_selected_exported_content)

## Load containers

In [None]:
path_to_containers = os.path.join(
    path_to_selected_exported_content,
    "containers.csv"
)
print(f"Opening {path_to_containers}")
df_containers = pd.read_csv(path_to_containers, index_col="id", dtype={
    "delivered_by_truck": "Int64",
    "picked_up_by_truck": "Int64",
    "delivered_by_large_scheduled_vehicle": "Int64",
    "picked_up_by_large_scheduled_vehicle": "Int64"
})

df_containers

Check number of large scheduled vehicles (deep sea vessels, feeders, barges, and trains).

In [None]:
df_containers.groupby(by="delivered_by_large_scheduled_vehicle").count()

## Load scheduled vehicles

Load the vehicles to enrich the information regarding the arrival and departure of the containers.

In [None]:
path_to_deep_sea_vessels = os.path.join(
    path_to_selected_exported_content,
    "deep_sea_vessels.csv"
)

path_to_feeders = os.path.join(
    path_to_selected_exported_content,
    "feeders.csv"
)

path_to_barges = os.path.join(
    path_to_selected_exported_content,
    "barges.csv"
)

path_to_trains = os.path.join(
    path_to_selected_exported_content,
    "trains.csv"
)

scheduled_vehicle_file_paths = {
    "deep_sea_vessels": path_to_deep_sea_vessels,
    "feeders": path_to_feeders,
    "barges": path_to_barges,
    "trains": path_to_trains
}

for name, path in scheduled_vehicle_file_paths.items():
    print("Check file exists for vehicle " + name + ".")
    assert os.path.isfile(path)

print("All files exist.")

In [None]:
for name, path in list(scheduled_vehicle_file_paths.items()):
    print("Check file size for vehicle " + name)
    size_in_bytes = os.path.getsize(path)
    if size_in_bytes <= 4:
        print("    This file is empty, ignoring it in the analysis from now on")
        del scheduled_vehicle_file_paths[name]

In [None]:
scheduled_vehicle_dfs = {
    name: pd.read_csv(path, index_col=0, parse_dates=["scheduled_arrival"])
    for name, path in scheduled_vehicle_file_paths.items()
}

for name, df in scheduled_vehicle_dfs.items():
    display(Markdown("#### " + name))
    scheduled_vehicle_dfs[name]["vehicle_type"] = name
    display(scheduled_vehicle_dfs[name].sort_values(by="scheduled_arrival"))

In [None]:
df_large_scheduled_vehicle = pd.concat(
    scheduled_vehicle_dfs.values()
)
df_large_scheduled_vehicle.sort_index(inplace=True)
df_large_scheduled_vehicle.info()
df_large_scheduled_vehicle

Plot arrival pattern.

In [None]:
plt.figure(figsize=(15, 3))

x, y, z = [], [], []
y_axis = []

y_scaling_factor = 2

for i, (name, df) in enumerate(scheduled_vehicle_dfs.items()):
    y_axis.append((i/y_scaling_factor, name))
    if len(df) == 0:
        continue
    arrivals_and_capacity = df[["scheduled_arrival", "moved_capacity"]]
    for _, row in arrivals_and_capacity.iterrows():
        event = row["scheduled_arrival"]
        moved_capacity = row["moved_capacity"]
        x.append(event)
        y.append(i / y_scaling_factor)
        z.append(moved_capacity / 20)

plt.xticks(rotation=45)
plt.yticks(*list(zip(*y_axis)))
plt.scatter(x, y, s=z, color='gray')
plt.ylim([-0.5, 1.5])
plt.show()

Transform data to check how many containers are delivered and picked up by which vehicle.

In [None]:
vehicle_to_teu_to_deliver = {}
vehicle_to_teu_to_pickup = {}

for i, container in df_containers.iterrows():
    teu = container["length"] / 20
    assert 1 <= teu <= 2.5

    if container["delivered_by"] != "truck":
        vehicle = container["delivered_by_large_scheduled_vehicle"]
        if vehicle not in vehicle_to_teu_to_deliver.keys():
            vehicle_to_teu_to_deliver[vehicle] = 0
        vehicle_to_teu_to_deliver[vehicle] += teu

    if container["picked_up_by"] != "truck":
        vehicle = container["picked_up_by_large_scheduled_vehicle"]
        if vehicle not in vehicle_to_teu_to_pickup.keys():
            vehicle_to_teu_to_pickup[vehicle] = 0
        vehicle_to_teu_to_pickup[vehicle] += teu

vehicle_to_teu_to_deliver, vehicle_to_teu_to_pickup

Add transformed data to vehicles.

In [None]:
s_delivery = pd.Series(vehicle_to_teu_to_deliver)
s_pickup = pd.Series(vehicle_to_teu_to_pickup)
df_large_scheduled_vehicle["capacity_delivery"] = s_delivery
df_large_scheduled_vehicle["capacity_pickup"] = s_pickup
df_large_scheduled_vehicle

In [None]:
for large_scheduled_vehicle_id in df_large_scheduled_vehicle.index:
    delivered_teu = vehicle_to_teu_to_deliver.get(large_scheduled_vehicle_id, 0)
    picked_up_teu = vehicle_to_teu_to_pickup.get(large_scheduled_vehicle_id, 0)
    capacity_in_teu = df_large_scheduled_vehicle.loc[large_scheduled_vehicle_id, "capacity_in_teu"]
    assert delivered_teu <= capacity_in_teu, f"{delivered_teu} is more than {capacity_in_teu} for vehicle "\
                                             f"with id {large_scheduled_vehicle_id}"
    assert picked_up_teu <= capacity_in_teu, f"{picked_up_teu} is more than {capacity_in_teu} for vehicle "\
                                             f"with id {large_scheduled_vehicle_id}"

## Load trucks

In [None]:
path_to_trucks = os.path.join(
    path_to_selected_exported_content,
    "trucks.csv"
)
assert os.path.isfile(path_to_trucks)

In [None]:
df_truck = pd.read_csv(
    path_to_trucks, index_col=0,
    parse_dates=[
        # Pickup
        "planned_container_pickup_time_prior_berthing",
        "realized_container_pickup_time",

        # Delivery
        "planned_container_delivery_time_at_window_start",
        "realized_container_delivery_time"
    ])
df_truck

In [None]:
assert len(df_truck[df_truck["picks_up_container"] & pd.isna(df_truck["realized_container_pickup_time"])]) == 0, \
       "If a truck picks up a container, it should always have a realized container pickup time"

assert len(df_truck[df_truck["delivers_container"] & pd.isna(df_truck["realized_container_delivery_time"])]) == 0, \
       "If a truck deliver a container, it should always have a realized container delivery time"

assert len(df_truck[~(df_truck["delivers_container"] | df_truck["picks_up_container"])]) == 0, \
       "There is no truck that neither delivers or picks up a container"

In [None]:
fig = plt.figure(figsize=(10, 5))
# set height ratios for subplots
gs = gridspec.GridSpec(2, 1, height_ratios=[2, 1]) 

# the upper subplot
ax1 = plt.subplot(gs[0])

plt.title("Relationship of vessels and truck arrivals")

ax1.set_ylabel("Number trucks per hour")
ax12 = container_deliveries_by_truck["delivers_container"].plot(ax=ax1, color="dimgray")
ax12.set_xlim([pd.Timestamp("2021-06-15"), pd.Timestamp(pd.Timestamp("2021-08-15"))])

# the lower subplot
ax2 = plt.subplot(gs[1], sharex=ax12)
arrivals.plot(color='gray', ax=ax2, marker=".", linestyle="None", legend=False)

ax2.scatter(x, y, s=z, color='gray')

ticks, labels = list(zip(*y_axis))
ax2.set_yticks(ticks)
ax2.set_yticklabels([l.capitalize().replace("_", " ") for l in labels])
ax2.set_ylim([-0.5, 2])

ax2.set_xlabel("")
plt.show()

In [None]:
ax = container_pickups.groupby(container_pickups.index.hour).mean()["picks_up_container"].plot()
plt.title("Container pickups at each hour of the day")
ax.xaxis.set_ticks(range(24))
plt.xlabel("Realized container pickup time")
plt.show()

container_deliveries_by_truck.groupby(container_deliveries_by_truck.index.hour).mean()["delivers_container"].plot()
plt.title("Container deliveries at each hour of the day")
ax.xaxis.set_ticks(range(24))
plt.xlabel("Realized container delivery time")
plt.show()

This is the probability of the truck to show up at any given hour of the week (by index).

In [None]:
days = ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')

ax = container_pickups.groupby(container_pickups.index.weekday).mean()["picks_up_container"].plot.bar()
ax.set_xlabel("")
ax.set_xticklabels(days)
plt.title("Container pickups at each day of the week")
plt.show()

ax = container_deliveries_by_truck.groupby(container_deliveries_by_truck.index.weekday).mean()["delivers_container"].plot.bar()
ax.set_xlabel("")
ax.set_xticklabels(days)
plt.title("Container deliveries at each day of the week")
plt.show()

In [None]:
delivered_and_picked_up_by_large_vessels_df = df_containers.loc[
    ~pd.isna(df_containers["picked_up_by_large_scheduled_vehicle"])
].join(
    df_large_scheduled_vehicle, on="picked_up_by_large_scheduled_vehicle", rsuffix="_picked_up"
).loc[
    ~pd.isna(df_containers["delivered_by_large_scheduled_vehicle"])
].join(
    df_large_scheduled_vehicle, on="delivered_by_large_scheduled_vehicle", rsuffix="_delivered_by"
)

delivered_and_picked_up_by_large_vessels_df

In [None]:
dwell_time = (
    delivered_and_picked_up_by_large_vessels_df["scheduled_arrival"]
    - delivered_and_picked_up_by_large_vessels_df["scheduled_arrival_delivered_by"]
)
dwell_time.describe()

In [None]:
dwell_time.astype("timedelta64[h]").plot.hist(bins=30, color="gray")
plt.xlabel("Hours between delivery and onward transportation (except trucks)")
plt.ylabel("Number container")
plt.show()