# Checking container properties

This file inspects the CSV output of ConFlowGen.
Here, it checks whether the synthetic output corresponds to the initial input distributions.
In addition, the onward transportation is inspected.
It assumes the default values as they are initially seeded into a new database.

In [None]:
import os
import pathlib
import ipywidgets as widgets
import pandas as pd
from IPython.display import Markdown
import matplotlib.pyplot as plt

## Read in the files

In [None]:
folder_of_this_jupyter_notebook = pathlib.Path.cwd()
export_folder = os.path.join(
    folder_of_this_jupyter_notebook,
    os.pardir,
    os.pardir,
    os.pardir,
    "conflowgen",
    "data",
    "exports"
)
folders = [
    folder
    for folder in os.listdir(export_folder)
    if os.path.isdir(
        os.path.join(
            export_folder,
            folder
        )
    )
]

dropdown_field = widgets.Dropdown(
    options=list(reversed(folders)),  # always show the newest first
    description='',
    layout={'width': 'max-content'}
)
dropdown_label = widgets.Label(value="Select the exported output: ")
display(widgets.HBox([dropdown_label, dropdown_field]))

In [None]:
path_to_selected_exported_content = os.path.join(
    export_folder,
    dropdown_field.value
)

path_to_containers = os.path.join(
    path_to_selected_exported_content,
    "containers.csv"
)
print(f"Opening {path_to_containers}")

In [None]:
df = pd.read_csv(path_to_containers, index_col="id", dtype={
    "delivered_by_truck": "Int64",
    "picked_up_by_truck": "Int64",
    "delivered_with_large_scheduled_vehicle": "Int64",
    "picked_up_by_large_scheduled_vehicle": "Int64"
})

df

In [None]:
df.info()

Replace underscores with spaces to have more beautiful visualizations.

In [None]:
df.columns = df.columns.str.replace('_',' ')

## Length distribution

In [None]:
df["length"].value_counts().plot.pie(label='')
plt.title("Container lengths")
plt.show()

In [None]:
assert len(df[df["length"] == -1]) == 0
assert len(df[df["length"] == 45]) == 0

## Weight distribution

In [None]:
weight_groups_20_foot = sorted(df[(df["length"] == 20) & (df["storage requirement"] != "empty")]["weight"].unique())
display(Markdown(f"Weight groups for 20' containers: {weight_groups_20_foot}"))
number_weight_groups_20_foot_containers = len(weight_groups_20_foot)
display(Markdown(f"Number of weight groups for 20' containers: {number_weight_groups_20_foot_containers}"))

weight_groups_40_foot = sorted(df[(df["length"] == 40) & (df["storage requirement"] != "empty")]["weight"].unique())
display(Markdown(f"Weight groups for 40' containers: {weight_groups_40_foot}"))
number_weight_groups_40_foot_containers = len(weight_groups_40_foot)
display(Markdown(f"Number of weight groups for 40' containers: {number_weight_groups_40_foot_containers}"))

In [None]:
display(Markdown("### Without empty containers"))

df[(df["length"] == 20) & (df["storage requirement"] != "empty")]["weight"].plot.hist(bins=number_weight_groups_20_foot_containers)
plt.title("20' containers")
plt.show()

df[(df["length"] == 40) & (df["storage requirement"] != "empty")]["weight"].plot.hist(bins=number_weight_groups_40_foot_containers)
plt.title("40' containers")
plt.show()

display(Markdown("### With empty containers"))

df[(df["length"] == 20)]["weight"].plot.hist(bins=number_weight_groups_20_foot_containers)
plt.title("20' containers")
plt.show()

df[(df["length"] == 40)]["weight"].plot.hist(bins=number_weight_groups_40_foot_containers)
plt.title("40' containers")
plt.show()

## Check foreign key integrity

In [None]:
delivering_truck_is_missing = (df["delivered by"] == "truck") & pd.isna(df["delivered by truck"])
assert not delivering_truck_is_missing.any()

In [None]:
delivering_vehicle_is_missing = (df["delivered by"] != "truck") & pd.isna(df["delivered by large scheduled vehicle"])
assert not delivering_vehicle_is_missing.any()

In [None]:
picking_up_truck_is_missing = (df["picked up by"] == "truck") & pd.isna(df["picked up by truck"])
assert not picking_up_truck_is_missing.any()

In [None]:
picking_up_vehicle_is_missing = (df["picked up by"] != "truck") & pd.isna(df["picked up by large scheduled vehicle"])
assert not picking_up_vehicle_is_missing.any()

## Storage requirement

In [None]:
df["storage requirement"].value_counts().plot.pie(label='')
plt.title("Storage requirements")
plt.show()

In [None]:
display(Markdown("Weight distribution of 20' empty containers"))
display(df[(df["storage requirement"] == "empty") & (df["length"] == 20)]["weight"].describe())
display(Markdown("Weight distribution of 40' empty containers"))
df[(df["storage requirement"] == "empty") & (df["length"] == 40)]["weight"].describe()

## Containers and the vehicle type for delivering and picking up

In [None]:
def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.2f}%\n({v:d})'.format(p=pct, v=val)
    return my_autopct

In [None]:
df["delivered by"].value_counts().plot.pie(label='', autopct=make_autopct(df["delivered by"].value_counts().values))
plt.title("Containers delivered by vehicle type")
plt.show()

In [None]:
df["picked up by"].value_counts().plot.pie(label='', autopct=make_autopct(df["picked up by"].value_counts().values))
plt.title("Containers picked up by vehicle type")
plt.show()

Frequency of containers being picked up by vehicle type m if they were delivered by a vehicle of type n before.

In [None]:
for vehicle_type in df["delivered by"].unique():
    vehicle_type_text_repr = vehicle_type.replace("_", " ")
    df_delivered_with_vehicle_type = df[df["delivered by"] == vehicle_type]
    df_delivered_with_vehicle_type["picked up by"].value_counts().plot.pie(
        label='',
        autopct=make_autopct(df_delivered_with_vehicle_type["picked up by"].value_counts().values)
    )
    plt.title("When delivered by " + vehicle_type_text_repr + ", containers are picked up by these vehicle types")
    plt.show()
    if len(df_delivered_with_vehicle_type[
        df_delivered_with_vehicle_type["picked up by"] == "truck"
    ]) == 0:
        display(Markdown(f"No container was delivered by {vehicle_type_text_repr} and picked up by truck"))
        continue

    df_delivered_with_vehicle_type[
        df_delivered_with_vehicle_type["picked up by"] == "truck"
    ]["emergency pickup"].value_counts().plot.pie(
        label="",
        autopct=make_autopct(df_delivered_with_vehicle_type[
        df_delivered_with_vehicle_type["picked up by"] == "truck"
    ]["emergency pickup"].value_counts().values)
    )
    plt.title("This amount of containers was picked up by a truck because otherwise "
              "the maximum dwell time would have been exceeded")
    plt.show()

In [None]:
for vehicle_type in df["picked up by"].unique():
    df_picked_up_by_vehicle_type = df[df["picked up by"] == vehicle_type]
    df_picked_up_by_vehicle_type["delivered by"].value_counts().plot.pie(
        label='',
        autopct=make_autopct(df_delivered_with_vehicle_type["delivered by"].value_counts().values)
    )
    plt.title("When picked up by " + vehicle_type + ", containers are delivered by these vehicle types")
    plt.show()