# Process the original file to match function

In [None]:
# to auto-reload the imports
# if we change something in our functions
%load_ext autoreload
%autoreload 2

%load_ext nb_black

# import the libraries required to do the work
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
import pandas as pd
import seaborn as sns
import datetime
from scipy.stats import norm
from scipy.interpolate import interp1d
import os
from tqdm.notebook import tqdm
import re

from src.utils.sharepoint import get_T1_ren_6kPax_schedule
from decouple import AutoConfig
from pathlib import Path

In [None]:
# get the schedule from Sharepoint
get_T1_ren_6kPax_schedule()

# get the paths to config (could be made as a function for notebooks)

DOTENV_FILE_PATH = Path(os.getcwd()) / "../../../data/secret/.env"
config = AutoConfig(search_path=DOTENV_FILE_PATH)

path_relative = config("T1_ren_6kPax_schedule_path")

path_data = Path(os.getcwd()) / ".." / ".." / ".." / path_relative

In [None]:
# define a function to insert space in flight number
def insert_space_after_letters(test_str: str):
    res = re.sub("[A-Za-z]+", lambda ele: ele[0] + " ", test_str)
    return res

In [None]:
# import the schedule from the excel file produced by Aero department
data_arr = pd.read_excel(
    path_data,
    header=0,
    sheet_name="ARR",
)

data_dep = pd.read_excel(
    path_data,
    header=0,
    sheet_name="DEP",
)

In [None]:
# edit data to match application format
data_arr_processed = data_arr.copy()
data_arr_processed["A/D"] = "A"
data_arr_processed["T1/T2(MM/9C/7C/TW)"] = "T1"
data_arr_processed["Int'l Regions"] = "unknown"

dct_name_change = {
    "貨客区分": "Category(P/C/O)",
    "DI区分": "Sector",
    "航空機識別": "Flight Number",  # <- to be split with space between letters and digits
    "座席数": "SEATS FC",
    "搭乗者数": "Pax_SUM FC",
    "到着予定日": "Flight Date",
    "STA": "Scheduled Time",
}

data_arr_processed.rename(columns=dct_name_change, inplace=True)

data_arr_processed["Flight Number"] = data_arr_processed["Flight Number"].apply(
    insert_space_after_letters
)

In [None]:
# problem : no Seats and Pax number for arrival flights...
# keep only relevant columns
# data_arr_processed = data_arr_processed[
#    [
#        "A/D",
#        "T1/T2(MM/9C/7C/TW)",
#        "Int'l Regions",
#        "Category(P/C/O)",
#        "Sector",
#        "Flight Number",
#        "SEATS FC",
#        "Pax_SUM FC",
#        "Flight Date",
#        "Scheduled Time",
#    ]
# ]

In [None]:
# edit data to match application format
data_dep_processed = data_dep.copy()
data_dep_processed["A/D"] = "D"
data_dep_processed["T1/T2(MM/9C/7C/TW)"] = "T1"
data_dep_processed["Intl Regions"] = "unknown"

dct_name_change = {
    "貨客区分": "Category(P/C/O)",
    "DI区分": "Sector",
    "航空機識別": "Flight Number",  # <- to be split with space between letters and digits
    "座席数": "SEATS FC",
    "搭乗者数": "PAX_SUM FC",
    "出発予定日": "Flight Date",
    "STD": "Scheduled Time",
}

data_dep_processed.rename(columns=dct_name_change, inplace=True)

data_dep_processed["Flight Number"] = data_dep_processed["Flight Number"].apply(
    insert_space_after_letters
)

In [None]:
# keep only relevant columns
data_dep_processed = data_dep_processed[
    [
        "A/D",
        "T1/T2(MM/9C/7C/TW)",
        "Intl Regions",
        "Category(P/C/O)",
        "Sector",
        "Flight Number",
        "SEATS FC",
        "PAX_SUM FC",
        "Flight Date",
        "Scheduled Time",
    ]
]

# correct input mistake (?)
mask_replace = data_dep_processed["PAX_SUM FC"] == "-"
data_dep_processed.loc[mask_replace, "PAX_SUM FC"] = 0

mask_replace = data_dep_processed["SEATS FC"] == "-"
data_dep_processed.loc[mask_replace, "SEATS FC"] = 0


In [None]:
# we should add a step to merge the two in one
# as we do not have seats and Pax for T1 schedule, let's forget about it for now

In [None]:
output_path = (
    Path(os.getcwd())
    / "../../../data/processed/Schedule (30th terminal peak, 6000 pax)_PROCESSED.xlsx"
)

writer = pd.ExcelWriter(
    output_path,
    engine="xlsxwriter",
)

with writer as writer:
    data_dep_processed.to_excel(writer, sheet_name="schedule")

# rewrite show-up fuinction

In [None]:
"""
the input schedule must have the format as per template.
the sheet_name should be "schedule"
"""

In [None]:
test_mask = (
    (data_dep_processed["A/D"] == direction)
    & (data_dep_processed["Sector"] == sector)
    & (data_dep_processed["Category(P/C/O)"] == "P")
    & (data_dep_processed["T1/T2(MM/9C/7C/TW)"] == terminal)
    & (data_dep_processed["Flight Date"] == pd.Timestamp(date_str))
)

In [None]:
data_dep_processed["SEATS FC"].unique()

In [None]:
_, df_Pax = show_up_from_schedule(
    path_to_schedule=output_path,
    direction="D",
    sector="I",
    terminal="T1",
    system="terminal",
    date_str="2017-03-19",
    custom_showup=False,
    custom_counter_rule=False,
)

In [None]:
df_Counters = show_up_from_schedule(
    path_to_schedule=output_path,
    direction="D",
    sector="I",
    terminal="T1",
    system="check-in",
    date_str="2017-03-19",
    custom_showup=False,
    custom_counter_rule=False,
)

In [None]:
output_path

In [None]:
df_Pax["N"] = 1
plot = df_Pax.set_index("time", drop=False)[["N"]].resample("5min").agg("sum") * 12
plot.plot()

In [None]:
df_Counters["total"].plot()

In [None]:
def show_up_from_schedule(
    path_to_schedule:pathlib.Path,
    direction:str="D",
    sector:str="I",
    terminal:str="T1",
    system:str="terminal",
    date_str:str="2017-03-19",
    custom_showup:bool=False,
    custom_counter_rule:bool=False,
):

    # =============================== preparatory work for all peak hour extractions============================================

    # give the paths to schedule forecast and show-up profiles
    # we also use the "airline code" sheet for show-up profiles
    # get env variables (for schedule and show-up files paths)
    DOTENV_FILE_PATH = Path(os.getcwd()) / "../../../data/secret/.env"
    config = AutoConfig(search_path=DOTENV_FILE_PATH)

    path_forecasts = path_to_schedule

    path_show_up = (
        Path(os.getcwd()) / ".." / ".." / ".." / config("ADRM_param_full_path")
    )

    # import the airline_code
    airline_code = pd.read_excel(
        path_show_up,
        sheet_name=r"airline_code",
        header=0,
    )

    # if custom showup, assign the mean and STD
    if custom_showup == True:
        loc_FSC = kwargs["loc_FSC"]
        scale_FSC = kwargs["scale_FSC"]
        loc_LCC = kwargs["loc_LCC"]
        scale_LCC = kwargs["scale_LCC"]
        loc_CHINA = kwargs["loc_CHINA"]
        scale_CHINA = kwargs["scale_CHINA"]
        loc_EARLY = kwargs["loc_EARLY"]
        scale_EARLY = kwargs["scale_EARLY"]

    # import the schedule from the excel file produced by Aero department
    data = pd.read_excel(
        path_forecasts,
        sheet_name=r"schedule",
        header=0,
    )

    # format a Schedules time column to make a Timeserie later on

    data["Scheduled Time"] = "2020-10-13 " + data["Scheduled Time"].astype(str)
    data["Scheduled Time"] = pd.to_datetime(data["Scheduled Time"])

    data["Flight Number"] = data["Flight Number"].replace(["JX821"], "JX 821")

    # ===========================================  function start ================================================
    # filter
    filtered_data = data[
        (
            (data["A/D"] == direction)
            & (data["Sector"] == sector)
            & (data["Category(P/C/O)"] == "P")
            & (data["T1/T2(MM/9C/7C/TW)"] == terminal)
            & (data["Flight Date"] == pd.Timestamp(date_str))
        )
    ]
    filtered_data = filtered_data.reset_index()
    data = filtered_data
    # ====================================== Counters =====================================
    if system == "check-in":
        # NEW fix some input mistakes
        data["Flight Number"] = data["Flight Number"].replace(["JX821"], "JX 821")
        data["Flight Number"] = data["Flight Number"].replace(["NS*****"], "NS *****")
        # split Airline Code
        data["Airline Code"] = data["Flight Number"].str.split(" ", 1, expand=True)[0]

        # NEW
        start_time = 2.5  # hours before STD for check-in opening
        onecounter_time = 0.75  # hours before STD with only one counter
        base_n_counter = 4
        seats_per_add_counter = 60

        # in case we change checkin counter allocation rule
        if custom_counter_rule == True:
            start_time = kwargs["start_time"]
            onecounter_time = kwargs["onecounter_time"]
            base_n_counter = kwargs["base_n_counter"]
            seats_per_add_counter = kwargs["seats_per_add_counter"]

        onecounter_slot = -int(((onecounter_time) * 60) // 5)
        start_slot = -int(((start_time) * 60) // 5)

        # create a dictionnary of airline and seats per 5 minutes
        # initialize with all {airline_code : [0...0]}
        dico = {
            airline_code: [0 for i in range(int(24 * 60 / 5))]
            for airline_code in data["Airline Code"]
        }

        # boucle sur les airlines
        for airline_code in data["Airline Code"].unique():

            # boucle sur les flight code
            for flight_number in data[(data["Airline Code"] == airline_code)][
                "Flight Number"
            ]:

                # round down 5 minutes le STD
                time = data[data["Flight Number"] == flight_number][
                    "Scheduled Time"
                ].iloc[0]
                STD_5interval = (time.hour * 60 + time.minute) // 5

                # on met le nombre de seats du vol à la position qui va bien dans les listes du dico
                dico[airline_code][STD_5interval] = (
                    dico[airline_code][STD_5interval]
                    + data[
                        (data["Scheduled Time"] == time)
                        & (data["Flight Number"] == flight_number)
                    ]["SEATS FC"].iloc[0]
                )

        df_Seats = pd.DataFrame.from_dict(dico)

        # initialize some dataframes
        df_Counters = pd.DataFrame().reindex_like(df_Seats)
        for col in df_Counters.columns:
            df_Counters[col].values[:] = int(0)

        # create a df over 3 days to avoid errors for flights close to midnight
        df_Counters_previous_day = df_Counters.copy()
        df_Counters_next_day = df_Counters.copy()
        df_Counters_previous_day = df_Counters_previous_day.reindex(
            index=["day-1 {}".format(i) for i in range(0, 288)]
        )
        df_Counters_next_day = df_Counters_next_day.reindex(
            index=["day+1 {}".format(i) for i in range(0, 288)]
        )

        df1 = df_Counters_previous_day
        df2 = df_Counters
        df3 = df_Counters_next_day

        df_Counters_3d = df1.append(df2).append(df3)
        df_Counters_3d = df_Counters_3d.fillna(0)

        offset = 288

        # First we add the seats for 2.5 hours before STD
        # to 45 min before STD
        for col in range(len(df_Seats.columns)):
            for i in range(len(df_Seats.index)):
                # When we see a cell with Seats for a flight
                if df_Seats.iloc[i, col] != 0:
                    # Wee check from 45 minutes to 2.5 hours before STD
                    for j in range(start_slot, onecounter_slot):
                        # for each cell, if there is already a number, we put add the seats
                        df_Counters_3d.iloc[i + offset + j, col] = (
                            df_Counters_3d.iloc[i + offset + j, col]
                            + df_Seats.iloc[i, col]
                        )
        # now we have a table with seats, let's apply the rule
        # valid on that period
        for col in range(len(df_Counters_3d.columns)):
            for i in range(len(df_Counters_3d.index)):
                if 0 < df_Counters_3d.iloc[i, col]:
                    df_Counters_3d.iloc[i, col] = max(
                        base_n_counter,
                        base_n_counter
                        + (
                            (df_Counters_3d.iloc[i, col] - 200) // seats_per_add_counter
                        ),
                    )

        # Then we do the last 45 minutes

        for col in range(len(df_Seats.columns)):
            for i in range(len(df_Seats.index)):
                # When we see a cell with Seats for a flight
                if df_Seats.iloc[i, col] != 0:
                    # we check from STD to 45 minutes before
                    for j in range(onecounter_slot, 1):
                        # only if no other flights are checking in, do we add a counter
                        if df_Counters_3d.iloc[i + offset + j, col] == 0:
                            df_Counters_3d.iloc[i + offset + j, col] = 1

        # merge into only 1d
        df_Counters_final = df_Counters.copy()
        for i in range(len(df_Counters_final.index)):
            df_Counters_final.iloc[i, :] = (
                df_Counters_3d.iloc[i, :]
                + df_Counters_3d.iloc[i + offset, :]
                + df_Counters_3d.iloc[i + 2 * offset, :]
            )
        df_Counters_final["total"] = df_Counters_final.sum(axis=1)

    # now we do all the show-up

    # ====================================== Terminal =====================================
    # For Terminal
    if system == "terminal":
        # import of the excel with the show up profiles
        show_up_ter = pd.read_excel(
            path_show_up,
            sheet_name=r"terminal",
            header=1,
        )
        show_up_ter = show_up_ter.drop([0, 1], axis=0)
        show_up_ter = show_up_ter.reset_index(drop=True)

        # interpolation of show_up profiles and inverse functions
        x = show_up_ter["time before STD"].to_numpy(dtype=float)

        yFSC = show_up_ter["cumulative distribution FSC"].to_numpy(dtype=float)
        yLCC = show_up_ter["cumulative distribution LCC"].to_numpy(dtype=float)
        yEARLY = show_up_ter["cumulative distribution EARLY"].to_numpy(dtype=float)
        yCHINA = show_up_ter["cumulative distribution CHINA"].to_numpy(dtype=float)

        f_ter_FSC = interp1d(x, yFSC, kind="linear")
        f_ter_LCC = interp1d(x, yLCC, kind="linear")
        f_ter_EARLY = interp1d(x, yEARLY, kind="linear")
        f_ter_CHINA = interp1d(x, yCHINA, kind="linear")

        if custom_showup == True:
            f_ter_FSC = lambda x: 1 - norm.cdf(x, loc=loc_FSC, scale=scale_FSC)
            f_ter_LCC = lambda x: 1 - norm.cdf(x, loc=loc_LCC, scale=scale_LCC)
            f_ter_EARLY = lambda x: 1 - norm.cdf(x, loc=loc_EARLY, scale=scale_EARLY)
            f_ter_CHINA = lambda x: 1 - norm.cdf(x, loc=loc_CHINA, scale=scale_CHINA)

        f_ter_FSC_inv_linear = interp1d(f_ter_FSC(x), x, kind="linear")
        f_ter_LCC_inv_linear = interp1d(f_ter_LCC(x), x, kind="linear")
        f_ter_EARLY_inv_linear = interp1d(f_ter_EARLY(x), x, kind="linear")
        f_ter_CHINA_inv_linear = interp1d(f_ter_CHINA(x), x, kind="linear")

        # let's allocate profiles to flight
        list_time_Pax = []
        list_flights = []
        list_ST = []
        for i in range(len(filtered_data)):
            N_flight_pax = int(filtered_data.loc[i, "PAX_SUM FC"])
            STD = filtered_data.loc[i, "Scheduled Time"]
            y = np.linspace(0.0001, 0.995, N_flight_pax)

            if filtered_data.loc[i, "Scheduled Time"] < pd.to_datetime(
                "2020-10-13 08:00:00"
            ) and filtered_data.loc[i, "Scheduled Time"] >= pd.to_datetime(
                "2020-10-13 02:00:00"
            ):
                temps_Terminal = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_ter_EARLY_inv_linear(y)
                )

            elif filtered_data.loc[i, "Intl Regions"] == "China":
                temps_Terminal = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_ter_CHINA_inv_linear(y)
                )

            elif filtered_data.loc[i, "Flight Number"][0:2] in airline_code[
                airline_code["FSC / LCC"] == "FSC"
            ]["airline code"].to_numpy(dtype="str"):
                temps_Terminal = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_ter_LCC_inv_linear(y)
                )

            else:
                temps_Terminal = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_ter_FSC_inv_linear(y)
                )

            for t in temps_Terminal:
                t = datetime.datetime(
                    year=2020,
                    month=10,
                    day=13,
                    hour=int((t % (24 * 60)) / 60),
                    minute=int(t % 60),
                    second=int(t % 1 * 60),
                )
                list_time_Pax.append(t)
                list_flights.append(filtered_data.loc[i, "Flight Number"])
                list_ST.append(filtered_data.loc[i, "Scheduled Time"])

    # ====================================== Security =====================================
    # For Security
    if system == "security":
        # import of the excel with the show up profiles
        show_up_sec = pd.read_excel(
            path_show_up,
            sheet_name=r"PRS",
            header=1,
        )
        show_up_sec = show_up_sec.drop([0, 1], axis=0)
        show_up_sec = show_up_sec.reset_index(drop=True)

        # interpolation of show_up profiles and inverse functions
        x = show_up_sec["time before STD"].to_numpy(dtype=float)

        yFSC = show_up_sec["cumulative distribution FSC"].to_numpy(dtype=float)
        f_sec_FSC = interp1d(x, yFSC, kind="linear")

        yLCC = show_up_sec["cumulative distribution LCC"].to_numpy(dtype=float)
        f_sec_LCC = interp1d(x, yLCC, kind="linear")

        yEARLY = show_up_sec["cumulative distribution EARLY"].to_numpy(dtype=float)
        f_sec_EARLY = interp1d(x, yEARLY, kind="linear")

        yCHINA = show_up_sec["cumulative distribution CHINA"].to_numpy(dtype=float)
        f_sec_CHINA = interp1d(x, yEARLY, kind="linear")

        yMORNING = show_up_sec["cumulative distribution MORNING"].to_numpy(dtype=float)
        f_sec_MORNING = interp1d(x, yEARLY, kind="linear")

        f_sec_FSC = interp1d(x, yFSC, kind="linear")
        f_sec_LCC = interp1d(x, yLCC, kind="linear")
        f_sec_EARLY = interp1d(x, yEARLY, kind="linear")
        f_sec_CHINA = interp1d(x, yCHINA, kind="linear")
        f_sec_MORNING = interp1d(x, yMORNING, kind="linear")

        f_sec_FSC_inv_linear = interp1d(f_sec_FSC(x), x, kind="linear")
        f_sec_LCC_inv_linear = interp1d(f_sec_LCC(x), x, kind="linear")
        f_sec_EARLY_inv_linear = interp1d(f_sec_EARLY(x), x, kind="linear")
        f_sec_CHINA_inv_linear = interp1d(f_sec_CHINA(x), x, kind="linear")
        f_sec_MORNING_inv_linear = interp1d(f_sec_MORNING(x), x, kind="linear")

        # let's allocate profiles to flight
        list_time_Pax = []
        list_flights = []
        list_ST = []
        for i in range(len(filtered_data)):
            N_flight_pax = int(filtered_data.loc[i, "PAX_SUM FC"])
            STD = filtered_data.loc[i, "Scheduled Time"]
            y = np.linspace(0.0001, 0.995, N_flight_pax)

            if filtered_data.loc[i, "Scheduled Time"] < pd.to_datetime(
                "2020-10-13 08:00:00"
            ) and filtered_data.loc[i, "Scheduled Time"] >= pd.to_datetime(
                "2020-10-13 02:00:00"
            ):
                temps_Security = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_sec_EARLY_inv_linear(y)
                )

            elif filtered_data.loc[i, "Scheduled Time"] < pd.to_datetime(
                "2020-10-13 12:00:00"
            ) and filtered_data.loc[i, "Scheduled Time"] >= pd.to_datetime(
                "2020-10-13 08:00:00"
            ):
                temps_Security = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_sec_MORNING_inv_linear(y)
                )

            elif filtered_data.loc[i, "Intl Regions"] == "China":
                temps_Security = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_sec_CHINA_inv_linear(y)
                )

            elif filtered_data.loc[i, "Flight Number"][0:2] in airline_code[
                airline_code["FSC / LCC"] == "FSC"
            ]["airline code"].to_numpy(dtype="str"):
                temps_Security = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_sec_LCC_inv_linear(y)
                )

            else:
                temps_Security = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_sec_FSC_inv_linear(y)
                )

            for t in temps_Security:
                t = datetime.datetime(
                    year=2020,
                    month=10,
                    day=13,
                    hour=int((t % (24 * 60)) / 60),
                    minute=int(t % 60),
                    second=int(t % 1 * 60),
                )
                list_time_Pax.append(t)
                list_flights.append(filtered_data.loc[i, "Flight Number"])
                list_ST.append(filtered_data.loc[i, "Scheduled Time"])

    # ====================================== Call to Gate =====================================
    if system == "CTG":
        # import of the excel with the show up profiles
        show_up_CTG = pd.read_excel(
            path_show_up,
            sheet_name=r"CTG",
            header=1,
        )
        show_up_CTG = show_up_CTG.drop([0, 1, 2], axis=0)
        show_up_CTG = show_up_CTG.reset_index(drop=True)

        # interpolation of CTG profiles for specified type and inverse functions
        x = show_up_CTG["time before STD"].to_numpy(dtype=float)

        y_CTG_C = show_up_CTG[
            "cumulative distribution code C type {}".format(CTG_type)
        ].to_numpy(dtype=float)
        y_CTG_E = show_up_CTG[
            "cumulative distribution code E type {}".format(CTG_type)
        ].to_numpy(dtype=float)
        f_CTG_C = interp1d(x, y_CTG_C, kind="linear")
        f_CTG_E = interp1d(x, y_CTG_E, kind="linear")

        f_CTG_C_inv_linear = interp1d(f_CTG_C(x), x, kind="linear")
        f_CTG_E_inv_linear = interp1d(f_CTG_E(x), x, kind="linear")

        # let's allocate profiles to flight
        list_time_Pax = []
        list_flights = []
        list_ST = []
        for i in range(len(filtered_data)):
            N_flight_pax = int(filtered_data.loc[i, "PAX_SUM FC"])
            STD = filtered_data.loc[i, "Scheduled Time"]
            y = np.linspace(0.0001, 0.995, N_flight_pax)

            if filtered_data.loc[i, "Aircraft_Narrow/Wide"] == "Narrow body":
                temps_CTG = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_CTG_C_inv_linear(y)
                )
            else:
                temps_CTG = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_CTG_E_inv_linear(y)
                )

            for t in temps_CTG:
                t = datetime.datetime(
                    year=2020,
                    month=10,
                    day=13,
                    hour=int((t % (24 * 60)) / 60),
                    minute=int(t % 60),
                    second=int(t % 1 * 60),
                )
                list_time_Pax.append(t)
                list_flights.append(filtered_data.loc[i, "Flight Number"])
                list_ST.append(filtered_data.loc[i, "Scheduled Time"])

    # ====================================== Boarding =====================================
    if system == "boarding":
        # import of the excel with the show up profiles
        show_up_boarding = pd.read_excel(
            path_show_up,
            sheet_name=r"boarding",
            header=0,
        )
        show_up_boarding = show_up_boarding.reset_index(drop=True)

        # interpolation of boarding profiles for specified type and inverse functions
        x = show_up_boarding["time before STD"].to_numpy(dtype=float)

        y_boarding_C = show_up_boarding["cumulative distribution code C"].to_numpy(
            dtype=float
        )
        y_boarding_E = show_up_boarding["cumulative distribution code E"].to_numpy(
            dtype=float
        )
        f_boarding_C = interp1d(x, y_boarding_C, kind="linear")
        f_boarding_E = interp1d(x, y_boarding_E, kind="linear")

        f_boarding_C_inv_linear = interp1d(
            f_boarding_C(x)[0:10], x[0:10], kind="linear"
        )
        f_boarding_E_inv_linear = interp1d(
            f_boarding_E(x)[0:12], x[0:12], kind="linear"
        )

        # let's allocate profiles to flight
        list_time_Pax = []
        list_flights = []
        list_ST = []
        for i in range(len(filtered_data)):
            N_flight_pax = int(filtered_data.loc[i, "PAX_SUM FC"])
            STD = filtered_data.loc[i, "Scheduled Time"]
            y = np.linspace(0.0001, 0.995, N_flight_pax)

            if filtered_data.loc[i, "Aircraft_Narrow/Wide"] == "Narrow body":
                temps_boarding = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_boarding_C_inv_linear(y)
                )
            else:
                temps_boarding = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - f_boarding_E_inv_linear(y)
                )

            for t in temps_boarding:
                t = datetime.datetime(
                    year=2020,
                    month=10,
                    day=13,
                    hour=int((t % (24 * 60)) / 60),
                    minute=int(t % 60),
                    second=int(t % 1 * 60),
                )
                list_time_Pax.append(t)
                list_flights.append(filtered_data.loc[i, "Flight Number"])
                list_ST.append(filtered_data.loc[i, "Scheduled Time"])

    # ====================================== deboarding =====================================
    if system == "arrivals":
        # read the excel with show-up profiles
        show_up_arrival = pd.read_excel(
            path_show_up,
            sheet_name=r"deboarding",
            header=1,
        )

        # interpolate deboarding profiles to use on schedule
        x = show_up_arrival["time after STA"].to_numpy(dtype=float)
        yC = show_up_arrival["cumulative distribution code C"].to_numpy(dtype=float)
        yE = show_up_arrival["cumulative distribution code E"].to_numpy(dtype=float)
        fC = interp1d(x, yC, kind="linear")
        fE = interp1d(x, yE, kind="linear")
        fC_inv_linear = interp1d(fC(x)[0:3], x[0:3], kind="linear")
        fE_inv_linear = interp1d(fE(x)[0:4], x[0:4], kind="linear")

        # let's allocate profiles to flight
        list_time_Pax = []
        list_flights = []
        list_ST = []
        for i in range(len(filtered_data)):
            N_flight_pax = int(filtered_data.loc[i, "PAX_SUM FC"])
            STA = filtered_data.loc[i, "Scheduled Time"]
            y = np.linspace(0.0001, 0.995, N_flight_pax)

            if filtered_data.loc[i, "Aircraft_Narrow/Wide"] == "Narrow body":
                temps_deboarding = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - fC_inv_linear(y)
                )
            else:
                temps_deboarding = (
                    filtered_data.loc[i, "Scheduled Time"].hour * 60
                    + filtered_data.loc[i, "Scheduled Time"].minute
                    - fE_inv_linear(y)
                )

            for t in temps_deboarding:
                t = datetime.datetime(
                    year=2020,
                    month=10,
                    day=13,
                    hour=int((t % (24 * 60)) / 60),
                    minute=int(t % 60),
                    second=int(t % 1 * 60),
                )
                list_time_Pax.append(t)
                list_flights.append(filtered_data.loc[i, "Flight Number"])
                list_ST.append(filtered_data.loc[i, "Scheduled Time"])

    if system == "check-in":
        return df_Counters_final
    else:
        dct_Pax = {
            "Flight Number": list_flights,
            "time": list_time_Pax,
            "Scheduled Time": list_ST,
        }
        df_Pax = pd.DataFrame(dct_Pax)
        return list_time_Pax, df_Pax

# Tests

In [None]:
from src.utils.profiles_from_schedule import generate_dep_Pax_Counters

In [None]:
df_Pax, df_Counters = generate_dep_Pax_Counters(path_to_schedule=output_path)