To install a package over pip so that it works in the notebook you have to run: 
```bash
$ source /opt/tljh/user/bin/activate
$ pip install <package name>
```
and then restart the ipykernal.

Imports

In [2]:
import ferryFile
import port
import numpy as np
import heapq
import scipy
from matplotlib import pyplot as plt 
import matplotlib_inline
import itertools
from typing import Dict, Set
import os
import json
import time
import datetime
import pandas as pd

Load the trip times from the scraped data

In [3]:
def get_date(file_name: str) -> str:
    """Time is in the format of YYYY-MM-DD-HH-MM-SS"""
    return ("-".join(file_name.split("_")[1].split("-")[0:6])).split(".")[0]

def get_week_day(time: str) -> str:
    """Returns 0-6 for monday through sunday for the given date. 
    Expects the time in YYYY-MM-DD-HH-MM-SS"""

    return datetime.datetime.strptime(time, "%Y-%m-%d-%H-%M-%S").weekday()


def load_trip_times_and_ports_from_files(directory: str = None) -> Set: 
    """Returns a set of the ports and dictionary of trip times in minutes with the keys being the route codes + the index of the day of the week, 
    based off the sailing durations in the files."""
    if directory == None: 
        directory = "../BC_Ferries_API_DATA/"

    ports = set()
    trip_times = {}
    
        
    for file in os.listdir(directory):
        with open(directory + file, "r") as f:
            data = json.load(f)
            possible_keys = ['routes', 'capacityRoutes', 'nonCapacityRoutes']
            #print(get_date(file))
            #print(get_week_day(get_date(file)))
            week_day = str(get_week_day(get_date(file)))

            if str(type(data)) == "<class 'str'>":
                #print(f"Type of data is string: {data}")
                #print("Continuing.")
                continue

            if 'routes' in data.keys():
                for route in data['routes']:
                    from_port = route['fromTerminalCode']
                    to_port = route['toTerminalCode']
                    ports.add(from_port)
                    ports.add(to_port)

                    route_code = route['routeCode']
                    assert route_code == from_port + to_port, "Sanity check that the route code is from_port+to_port"

                    route_code = route_code 
                    
                    trip_time = route['sailingDuration']
                    if trip_time == "":
                        #print("Trip time is empty. Continuing")
                        continue
                    if ":" in trip_time: 
                        #then the format is %H:%M
                        hours, minutes = trip_time.split(":")
                    else:
                        #then the format is "%Hh %%Mm"
                        hours, minutes = trip_time.split(" ")
                        hours = hours.strip("h")
                        minutes = minutes.strip("m")

                        

                    trip_time_in_minutes = 60 * int(hours) + int(minutes)

                    # Since the scraped data has multiple routs from one port to another, 
                    # we are only going to pay attention to the direct route, ie take the min
                    if route_code in trip_times.keys():
                        if trip_times[route_code] > trip_time_in_minutes:
                            trip_times[route_code] = trip_time_in_minutes
                    else:
                        trip_times[route_code] = trip_time_in_minutes
                
                    

    return ports, trip_times


PORT_CODES, TRIP_TIMES = load_trip_times_and_ports_from_files()
print(TRIP_TIMES)

{'SWBTSA': 95, 'SWBSGI': 40, 'HSBNAN': 100, 'TSASWB': 95, 'HSBLNG': 40, 'NANHSB': 100, 'TSASGI': 83, 'SWBFUL': 35, 'DUKTSA': 120, 'TSADUK': 120, 'LNGHSB': 40, 'HSBBOW': 20, 'MILBTW': 25, 'PBBPPH': 390, 'PBBSHW': 45, 'CMXPWR': 90, 'PPHPBB': 360, 'PPHBEC': 1095, 'PPHKLE': 690, 'PPHPOF': 800, 'PPHPPR': 1019, 'PPHSHW': 580, 'ERLSLT': 50, 'FULSWB': 35, 'ALRMCN': 40, 'PWRCMX': 90, 'SLTERL': 50, 'BTWMIL': 25, 'PBBPPR': 614, 'POFBEC': 250, 'SHWPBB': 44, 'SHWBEC': 485, 'SHWPOF': 190, 'KLEPPR': 570, 'PBBBEC': 560, 'PBBKLE': 200, 'PBBPOF': 265, 'POFPBB': 239, 'POFPPH': 780, 'BECPBB': 554, 'BECPOF': 255, 'BECPPH': 1095, 'BECSHW': 480, 'POFSHW': 165, 'SHWPPH': 585, 'KLEPPH': 660, 'PPRPBB': 680, 'KLEPBB': 210, 'PPRKLE': 560, 'PPRPPH': 1110}


Constants and Objects

In [4]:
MAX_SIMULATION_TIME = 1000


MEAN_TRIP_TIME = 20
SD_TRIP_TIME = 1.5
MEAN_LOADING_UNLOADING_TIME = 10
SD_LOADING_UNLOADING_TIME = 1


FERRIES = []
SEEDS = [234,4234,324,325543,2,34,546,74,567,568,568,89,364575,342,45,3456473,546,3456473]
SEEDS = set(SEEDS)

# Programmatically create the port objects for each of the port codes
for port_code in PORT_CODES:
    globals()[port_code] = port.Port() 
    globals()[port_code].port_code = port_code


To generate the cdf we can use np.norm.cdf(val, loc, scale) where location is the mean and scale is the standard deviation. 
To find the values for a certain percent we can use ppf from scipy stats

In [5]:
def generate_scheduled_departure_and_arrival_times(arrival, departure, trip_length, wanted_on_time_percent: float = 0.85) -> None:
    trip_distribution = scipy.stats.norm(loc=MEAN_TRIP_TIME, scale=SD_TRIP_TIME)
    loading_unloading_distribution = scipy.stats.norm(loc=MEAN_LOADING_UNLOADING_TIME, scale=SD_LOADING_UNLOADING_TIME)

    current_time = 0
    for i in range(1, trip_length):
        # Add the time to get to the next port
        current_time += trip_distribution.ppf(wanted_on_time_percent)

        # Set the expected arrival time
        arrival[i] = current_time

        # Add the time to load and unload at that port
        current_time += loading_unloading_distribution.ppf(wanted_on_time_percent)

        #Set the expected departure time
        departure[i] = current_time

    for i in range(len(arrival)):
        if type(arrival[i]) not in [int, float]:
            arrival[i] = arrival[i].item()
    for i in range(len(departure)):
        if type(departure[i]) not in [int, float]:
            departure[i] = departure[i].item()
    

In [6]:
expected_arrival = [ 0 for i in range(4)]
expected_departure = [0 for i in range(4)]

generate_scheduled_departure_and_arrival_times(expected_arrival, expected_departure, 4, 0.70)

In [7]:
time_to_count_as_late = 0
lates_for_each = {}
for ppf_value in range(1, 100, 1):
    if (ppf_value % 10 == 0):
        print(f"PPF value: {ppf_value}")
    ppf_value = ppf_value / 100
    lates_for_each[ppf_value] = []

    expected_arrival = [ 0 for i in range(4)]
    expected_departure = [0 for i in range(4)]
    generate_scheduled_departure_and_arrival_times(expected_arrival, expected_departure, 4, ppf_value)
    
    for seed_value in range(100):
        np.random.seed(seed_value)
        # Most of the trips take the same amount of time independent of the ferry 
        # Makes every port to every other port take 20ish minutes. 
        # Times are made to be the same independent of direction.
        TRIP_TIMES = {}
        for port in PORT_CODES:
            for other_port in [i for i in PORT_CODES if i != port]:
                if other_port + port in TRIP_TIMES.keys():
                    TRIP_TIMES[port+other_port] = TRIP_TIMES[other_port+port]
                else:
                    TRIP_TIMES[port + other_port] = np.random.normal(MEAN_TRIP_TIME, SD_TRIP_TIME)
    
        # Set the loading and unloading times
        # Port + Current_time for every 5 step increment
        LOADING_AND_UNLOADING_TIMES = {}
        for port_code in PORT_CODES:
            for time in range(0, MAX_SIMULATION_TIME, 5):
                LOADING_AND_UNLOADING_TIMES[globals()[port_code] + time] = np.random.normal(MEAN_LOADING_UNLOADING_TIME, SD_LOADING_UNLOADING_TIME)
        

        
        #Create a ferry object
        QNW = ferryFile.Ferry()
        QNW.ferry_name = "Queen of New Westminster"
        QNW.ferry_code = "QNW"
        QNW.ferry_capacity = 150
        QNW.ferry_route = [TSA, SWB, TSA]
        QNW.ferry_trip_time = TRIP_TIMES
        QNW.set_expected_departure_times(expected_departure)
        QNW.set_expected_arrival_times(expected_arrival)
        QNW.loading_unloading_time = LOADING_AND_UNLOADING_TIMES
        QNW.trips_required = 1
        
        QNW.trips_completed = 0
        QNW.ferry_current_port_index = 0

        QNW.print_stats_at_end = False

        #QNW.validate_arrival_and_departure_times()
    
        current_time =0
        event_queue = []
        heapq.heapify(event_queue)
        heapq.heappush(event_queue, (current_time, QNW))
        
    
        current_event = heapq.heappop(event_queue)
        while current_event[1].next_function != None:
            current_time = current_event[0]
    
            next_event_time = current_event[1].next_function(current_time)
            heapq.heappush(event_queue, (next_event_time, current_event[1]))
    
            current_event = heapq.heappop(event_queue)

        lates_for_each[ppf_value].append(QNW.total_times_late(time_to_count_as_late))




#print(lates_for_each)
for key in lates_for_each.keys():
    data = lates_for_each[key]
    times_late_to_arrive = [i[0] for i in data]
    times_late_to_depart = [i[1] for i in data]

    times_late_to_arrive = sum(times_late_to_arrive) / len(times_late_to_arrive)
    times_late_to_depart = sum(times_late_to_depart) / len(times_late_to_depart)

    lates_for_each[key] = [times_late_to_arrive, times_late_to_depart]
#print(lates_for_each)

plt.bar(lates_for_each.keys(), [lates_for_each[key][0] for key in lates_for_each.keys()], 0.01)

PPF value: 10
PPF value: 20


KeyboardInterrupt: 

Use pandas to load data from csv - Keep in mind to run this locally, and not upload the files to github or the jupyter server!

In [2]:
#These are some pandas examples
data_file = None

#Load data from file
#all_data = pd.read_csv(file)
data = {"date_key": ["2024-04-0" + str(i) for i in range(1,10)]}
for i in range(10,28):
    data["date_key"].append("2024-04-" + str(i))
all_data = pd.DataFrame(data = data)

#Seperate data by day of the week
all_data['day_of_the_week'] = [datetime.datetime.strptime(i, "%Y-%m-%d").weekday() for i in all_data['date_key']]

#Add route code
all_data["route_id"] = np.random.choice([5,9], all_data.shape[0])

all_data.loc[all_data["route_id"] == 5]


Unnamed: 0,date_key,day_of_the_week,route_id
1,2024-04-02,1,5
2,2024-04-03,2,5
3,2024-04-04,3,5
4,2024-04-05,4,5
5,2024-04-06,5,5
6,2024-04-07,6,5
7,2024-04-08,0,5
8,2024-04-09,1,5
9,2024-04-10,2,5
10,2024-04-11,3,5


In [3]:
#Gets the year for the data from the file name
def get_year(file_name: str) -> str:
    print(file_name)
    file_name = file_name.split("fy")[1].strip(".csv").strip("fy")
    if "_" in file_name:
        return file_name.split("_")[0]
    return file_name

In [4]:
#Adds a date key to the data  to make it easier to filter
def add_date_key(data: pd.DataFrame) -> pd.DataFrame:
    #If the data set is analyze or event then there is a "date_key" column exclusive of the end date
    if "date_key" in data.columns: 
        #Date is in the format of YYYY-MM-DD
        data["date_key"] = pd.to_datetime(data["date_key"])

        date = (data.loc[data["sailing_id"] == "LONG202304071800"]["date_key"].values[0])
        print(type(date))
        print(date.astype(str)[:10])
        

    #If it is injest then there is a "Sched Dept Ts" column
    elif "Sched Dept Ts" in data.columns: 
        #Date is the format of YYYY-MM-DD-HH-MM-SS
        data["date_key"] = pd.to_datetime(data["Sched Dept Ts"])

    #Throw an error to show that we have a data set without the proper columns
    else: 
        print("A data set was passed that did not follow any of the formats")
        assert(False)
        

def filter_by_dates(data: pd.DataFrame, start_date: str | np.datetime64, end_date: str | np.datetime64) -> pd.DataFrame:
    # Filter the data set by the start and end date and return the new data set
    if type(start_date) is str:
        start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
    if type(end_date) is str:
        end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    


    assert(start_date <= end_date), "Start date must be before end date"

    new_data = data.loc[data["date_key"] >= start_date]
    new_data = new_data.loc[new_data["date_key"] < end_date]

    return new_data



In [5]:
from dotenv import load_dotenv
assert(load_dotenv())

ANALYZE_FOLDER = os.getenv("SAILING_ANALYZE_FOLDER")
INGEST_FOLDER = os.getenv("SAILING_INGEST_FOLDER")
EVENT_FOLDER = os.getenv("SAILING_EVENT_FOLDER")

files = os.listdir(ANALYZE_FOLDER) 
all_analyze_data = pd.concat(map(pd.read_csv, [ANALYZE_FOLDER + "/" + file for file in files]))
all_analyze_data.dropna(how='all', inplace=True, ignore_index=True)

def convert_to_epoch(date: str):
    dt_obj = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M")
    return int(dt_obj.timestamp())

all_analyze_data["arrival_actual_datetime"] = all_analyze_data["arrival_actual_datetime"].apply(convert_to_epoch)
all_analyze_data["departure_actual_datetime"] = all_analyze_data["departure_actual_datetime"].apply(convert_to_epoch)

all_analyze_data["sailing_time"] = (all_analyze_data["arrival_actual_datetime"] - all_analyze_data["departure_actual_datetime"])/60
add_date_key(all_analyze_data)
assert(all_analyze_data.loc[all_analyze_data["sailing_time"] <= 0].empty) #Sanity check that none of the boats are time travellers

  all_analyze_data = pd.concat(map(pd.read_csv, [ANALYZE_FOLDER + "/" + file for file in files]))
  all_analyze_data = pd.concat(map(pd.read_csv, [ANALYZE_FOLDER + "/" + file for file in files]))
  all_analyze_data = pd.concat(map(pd.read_csv, [ANALYZE_FOLDER + "/" + file for file in files]))


<class 'numpy.datetime64'>
2023-04-07


In [None]:
type(all_analyze_data["date_key"].values[0])

In [None]:
#Get the inport time for the vessel
def get_in_port_time(data: pd.DataFrame, sailing_id: str) -> int:
    #Get the information for the sailing id and data from the day
    sailing_info = data.loc[data["sailing_id"] == sailing_id]
    assert len(sailing_info.index) == 1, "There should only be one sailing id in the data set"
    data_for_the_day = filter_by_dates(data, sailing_info["date_key"].values[0], sailing_info["date_key"].values[0] + np.timedelta64(1, 'D'))
    
    arrival_port_id = sailing_info["arr_port_id"].values[0]

    #Filter by port
    data_for_the_day = data_for_the_day.loc[data_for_the_day["dep_port_id"] == arrival_port_id]
    if len(data_for_the_day.index) == 0:
        print("No data for the day")
        return -1
    #Filter by time
    sailing_arrival = sailing_info["arrival_actual_datetime"].values[0]
    data_for_the_day = data_for_the_day.loc[data_for_the_day["departure_actual_datetime"] > sailing_arrival]
    if len(data_for_the_day.index) == 0:
        print("No data for the day")
        return -1
    #Get what should have been the arrival
    time_wanted = data_for_the_day["departure_actual_datetime"].min()
    arrival = data_for_the_day.loc[data_for_the_day["departure_actual_datetime"] == time_wanted]


    arri_time = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=int(sailing_info["arrival_actual_datetime"].values[0].astype(str)))
    dept_time = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=int(arrival["departure_actual_datetime"].values[0].astype(str)))



    # Compute difference
    time_diff = dept_time - arri_time
    return time_diff.total_seconds() / 60

    



for index, row in all_analyze_data.iterrows():
    sailing_id = row["sailing_id"]
    row["in_port_time_at_destination"] = get_in_port_time(all_analyze_data, sailing_id)
    print(f"Sailing ID: {sailing_id} In Port Time(m): {row["in_port_time_at_destination"]}")

Sailing ID: SWB202207310555 In Port Time(m): 9.0
Sailing ID: SWB202207310620 In Port Time(m): 4.0
Sailing ID: SWB202207310915 In Port Time(m): 8.0
Sailing ID: SWB202207311000 In Port Time(m): 3.0
Sailing ID: SWB202207311410 In Port Time(m): 14.0
Sailing ID: SWB202207311420 In Port Time(m): 14.0
Sailing ID: LONG202211251800 In Port Time(m): 12.0
Sailing ID: LONG202212021800 In Port Time(m): 14.0
Sailing ID: LONG202212091800 In Port Time(m): 10.0
Sailing ID: LONG202212161800 In Port Time(m): 4.0
Sailing ID: LONG202212301800 In Port Time(m): 9.0
Sailing ID: LONG202301061800 In Port Time(m): 8.0
Sailing ID: LONG202301131800 In Port Time(m): 21.0
Sailing ID: LONG202301201800 In Port Time(m): 2.0
Sailing ID: LONG202301271800 In Port Time(m): 8.0
Sailing ID: SAT202204021650 In Port Time(m): 8.0
Sailing ID: SAT202204161650 In Port Time(m): 10.0
Sailing ID: SAT202204231650 In Port Time(m): 10.0
Sailing ID: SAT202204301650 In Port Time(m): 8.0
Sailing ID: SAT202205071650 In Port Time(m): 11.0
Sa

KeyboardInterrupt: 

In [8]:
x = all_analyze_data.loc[all_analyze_data["sailing_id"] == "OB202304071815"]
x["date_key"]


24755   2023-04-07
Name: date_key, dtype: datetime64[ns]

In [None]:
files = os.listdir(EVENT_FOLDER)
eventData = pd.concat(map(pd.read_csv, [EVENT_FOLDER + "/" + file for file in files])) #Read all the files and concatenate them into one dataframe
eventData.dropna(how="all", inplace=True, ignore_index=True)

arr_ports = [int(i) for i in  eventData["arr_port_id"].unique()]
dep_ports = [int(i) for i in  eventData["dep_port_id"].unique()]

ports = set(arr_ports + dep_ports)

portID_to_port_map = {}
for port in ports: 
    #Check arrivals 
    port_abb_arr = eventData.loc[eventData["arr_port_id"] == port]["arr_port_abb"].unique()

    #Check departures
    port_abb_dep = eventData.loc[eventData["dep_port_id"] == port]["dep_port_abb"].unique()

    #Verify that one the abbreviation was found for atleast 1 and if it was found for only one assign it to both.
    if len(port_abb_dep) == 0 and len(port_abb_arr) == 0: 
        print(f"Could not find the abbreviation for {port}.")
        continue
    elif len(port_abb_dep) == 0: 
        port_abb_dep = port_abb_arr
    elif len(port_abb_arr) == 0:
        port_abb_arr = port_abb_dep

    port_abb_arr = port_abb_arr[0]
    port_abb_dep = port_abb_dep[0]
    
    
    assert(port_abb_dep == port_abb_arr)

    if port in portID_to_port_map.keys():
        assert(portID_to_port_map[port] == port_abb_arr) #Sanity check
    else: 
        portID_to_port_map[port] = port_abb_arr

    
def portID_to_abb(portID:int | str) -> str: 
    if type(portID) is str: 
        portID = int(portID)

    return portID_to_port_map[portID]

portID_to_port_map


In [None]:
FIGURES_DIR = "figures"

#Seperate data by route
Route5_data = all_analyze_data.loc[(all_analyze_data["route_id"] == 5)]
Route9_data = all_analyze_data.loc[all_analyze_data["route_id"] == 9]

In [None]:
def get_port_from_route(route_data, route_leg_id, arrival: bool = False, departure: bool = False):
    if arrival and departure:
        raise ValueError("Both arrival and departure cannot be true")
    if not arrival and not departure:
        raise ValueError("Either arrival or departure must be true")
    if arrival:
        key = "arr_port_id"
    else:
        assert(departure)
        key = "dep_port_id"


    port_id = route_data[route_data["route_leg_id"] == route_leg_id][key].unique()
    assert(len(port_id) == 1)
    port_id = port_id[0]
    return port_id

In [None]:
#Make histograms for each of the legs with all of the vessels to see the general distribution of sailing times
def make_histograms_for_legs(route_data: pd.DataFrame) -> None:
    route_vessels = route_data["vessel_id"].unique()
    route_legs = route_data["route_leg_id"].unique()


    for route_leg_id in route_legs: 
        #Get the arrival and departure port ids
        arrival_port_id = portID_to_abb(str(int(get_port_from_route(route_data, route_leg_id, arrival=True))))
        departure_port_id = portID_to_abb(str(int(get_port_from_route(route_data, route_leg_id, departure=True))))

        route_figure = plt.figure()
        plt.title(f"Route {route_leg_id.split('-')[0]}, Leg {arrival_port_id} to {departure_port_id}")
        plt.xlabel("Sailing Time (m)")
        plt.ylabel("Frequency")

        #Get the min x and max x for the histogram
        min_x = route_data.loc[route_data["route_leg_id"] == route_leg_id, "sailing_time"].min()
        max_x = route_data.loc[route_data["route_leg_id"] == route_leg_id, "sailing_time"].max()

        
        for vessel in route_vessels: 
            #verify the vessel has sailed on the route otherwise skip
            if route_data.loc[(route_data["vessel_id"] == vessel) & (route_data["route_leg_id"] == route_leg_id)].empty:
                continue

            #plot the histogram
            route_data.loc[(route_data["vessel_id"] == vessel) & (route_data["route_leg_id"] == route_leg_id)]["sailing_time"].hist(bins=25, alpha=0.3, label=f"Vessel {vessel}", range=[min_x, max_x])

        #format the plot
        plt.grid(False)
        plt.legend()
        plt.savefig(f"{FIGURES_DIR}/R{route_leg_id.split('-')[0]}_{departure_port_id + arrival_port_id}_histogram.png")
        plt.show()


make_histograms_for_legs(Route5_data)

In [None]:
#Make plots for the sailing times on each route for each vessel to check if the timing is consistent across vessels.
def make_subplot_histograms_for_legs(route_data: pd.DataFrame) -> None:
    route_vessels = route_data["vessel_id"].unique()
    route_legs = route_data["route_leg_id"].unique()

    for route_leg_id in route_legs: 
        #Get the arrival, departure port ids and the route id
        arrival_port_id = portID_to_abb(str(int(get_port_from_route(route_data, route_leg_id, arrival=True))))
        departure_port_id = portID_to_abb(str(int(get_port_from_route(route_data, route_leg_id, departure=True))))
        route_id = route_leg_id.split("-")[0]

        #Create the shared plot
        route_figure = plt.figure()
        plt.suptitle(f"Route {route_id}, Leg {departure_port_id} to {arrival_port_id}")

        #Get the min x and max x for the histogram
        min_x = route_data.loc[route_data["route_leg_id"] == route_leg_id, "sailing_time"].min()
        max_x = route_data.loc[route_data["route_leg_id"] == route_leg_id, "sailing_time"].max()

        for vessel in route_vessels: 
            #verify the vessel has sailed on the route
            if route_data.loc[(route_data["vessel_id"] == vessel) & (route_data["route_leg_id"] == route_leg_id)].empty:
                continue


            #Determine the subplot location
            unique_vessels_for_route = route_data.loc[route_data["route_leg_id"] == route_leg_id, "vessel_id"].nunique()
            index_array_for_vessel_on_route = route_data.loc[route_data["route_leg_id"] == route_leg_id, "vessel_id"].unique()
            plt.subplot(1 if unique_vessels_for_route <= 3 else 2, 
                        unique_vessels_for_route if unique_vessels_for_route <= 3 else 3, 
                        np.where(index_array_for_vessel_on_route == vessel)[0][0] + 1)
            
            #Plot the data
            route_data.loc[(route_data["vessel_id"] == vessel) & (route_data["route_leg_id"] == route_leg_id)]["sailing_time"].hist(
                bins=20, alpha=0.5, label=f"Vessel {vessel}", range=[min_x, max_x])

            #Set the title, labels, and grid
            plt.title(f"Vessel {vessel}")
            plt.xlabel("Sailing Time (m)")
            plt.ylabel("Frequency")
            plt.grid(False)
            
        
        plt.tight_layout()
        plt.savefig(f"{FIGURES_DIR}/Route{route_id}_SubPlots_{departure_port_id+arrival_port_id}.png")
        plt.show()


make_subplot_histograms_for_legs(Route5_data)

In [None]:
all_analyze_data.loc[(all_analyze_data["route_id"] == 9) & (all_analyze_data["route_leg_id"].apply(lambda x: x.split("-")[0]) == "5")]
all_analyze_data.loc[(all_analyze_data["route_id"] == 5) & (all_analyze_data["route_leg_id"].apply(lambda x: x.split("-")[0]) == "9")]


##Simple sim without ferries mattering

In [None]:
route = []