# Checking the Input Distributions

Run all imports.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import pandas as pd
import datetime

# the default interface
from conflowgen import DatabaseChooser
from conflowgen import ContainerLength
from conflowgen import ContainerLengthDistributionManager

# mingling with the interna that are not part of the API
from conflowgen.domain_models.distribution_repositories.container_weight_distribution_repository import ContainerWeightDistributionRepository
from conflowgen.domain_models.distribution_repositories.truck_arrival_distribution_repository import TruckArrivalDistributionRepository

Connect to database.

In [None]:
database_chooser = DatabaseChooser()
demo_file_name = "demo_deham_cta.sqlite"
if demo_file_name in database_chooser.list_all_sqlite_databases():
    database_chooser.load_existing_sqlite_database(demo_file_name)
else:
    print("Database is missing, nothing to do here")

Load all distributions.

In [None]:
length_distribution = ContainerLengthDistributionManager().get_container_lengths()
weight_distribution = ContainerWeightDistributionRepository().get_distribution()
truck_arrival_distribution = TruckArrivalDistributionRepository().get_distribution()

Plot the container length distribution.

In [None]:
length_distribution = {
    key: value
    for (key, value) in length_distribution.items()
    if value > 0
}

plt.pie(list(length_distribution.values()), labels=[str(name) for name in length_distribution.keys()])
plt.axis('equal')
plt.xlabel("")
plt.show()

Plot the weight distribution for twenty foot containers.

In [None]:
x, y = zip(*list(sorted(weight_distribution[ContainerLength.twenty_feet].items())))
plt.bar(x, [i*100 for i in y])

# German
plt.xlabel("Gewicht (in Tonnen)")
plt.ylabel("Anteil (in Prozent)")
plt.title("20-Fuß-Container")

# English
plt.xlabel("Weight (in metric tonnes)")
plt.ylabel("Share (in percentage)")
plt.title("20 foot container")

plt.show()

Plot the weight distribution for forty foot containers

In [None]:
x, y = zip(*list(sorted(weight_distribution[ContainerLength.forty_feet].items())))
plt.bar(x, [i*100 for i in y])

# German
plt.xlabel("Gewicht (in Tonnen)")
plt.ylabel("Anteil (in Prozent)")
plt.title("40-Fuß-Container")

# English
plt.xlabel("Weight (in metric tonnes)")
plt.ylabel("Share (percentage)")
plt.title("20 foot container")

plt.show()

Plot the truck arrivals over one week

In [None]:
truck_arrival_distribution = TruckArrivalDistributionRepository.get_distribution()
hour_in_week, fraction = zip(*list(sorted(truck_arrival_distribution.items())))
weekday_in_week = [x/24 + 1 for x in hour_in_week]
percentage = [x * 100 for x in fraction]

fig, ax = plt.subplots(figsize=(15, 3))
plt.plot(weekday_in_week, percentage)
plt.xlim([1, 7])
ax.xaxis.grid(True, which='minor', color='gray')
ax.xaxis.grid(True, which='major', color='k')
ax.xaxis.set_minor_locator(mticker.MultipleLocator(1/24))

# German
plt.title("Ankunftsraten (gemittelt)")
ax.set_xticks(
    [i for i in range(1, 8)]
)
ax.set_xticklabels(
    ["Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag", "Sonntag"]
)
plt.xlabel("Wochentag")
plt.ylabel("Anteil (in Prozent)")

# English
plt.title("Arrival rates (averaged)")
ax.set_xticks(
    [i for i in range(1, 8)]
)
ax.set_xticklabels(
    ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
)
plt.xlabel("Week day")
plt.ylabel("Share (in percent)")

plt.show()

Summarize truck arrivals for one average week day.

In [None]:
df = pd.DataFrame(truck_arrival_distribution.items())
df.columns = ["hour in week", "fraction"]
now = datetime.datetime.now()
this_monday = now - datetime.timedelta(days=now.weekday())
this_monday = datetime.datetime.combine(this_monday, datetime.time())
df = df.assign(date=df["hour in week"].apply(lambda x: this_monday + datetime.timedelta(hours=x)))
df

In [None]:
ax = df.groupby(df.date.dt.hour).mean()["fraction"].plot()
ax.set_xticks(
    [i for i in range(0, 24)]
)
ax.xaxis.set_major_locator(mticker.MultipleLocator(1))

# German
plt.title("Ankunftsraten (gemittelt)")
plt.xlabel("Stunde")
plt.ylabel("Anteil (in Prozent)")

# English
plt.title("Arrival rates (average)")
plt.xlabel("Hour")
plt.ylabel("Share (percentage)")

plt.xlim([0, 23])
plt.show()