# Fix pathing

In [1]:
import sys


sys.path.append("../..")


In [2]:
import constants

import os


constants.PROJECT_DIRECTORY_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(constants.PROJECT_DIRECTORY_PATH))))


# Imports

In [3]:
import plotter
import datahandler

import matplotlib.pyplot
import numpy as np
import pandas as pd
import seaborn as sns
import IPython.display


# Constants

In [4]:
FOLDER_NAME = "2024_05_06_20_55_47_NONE"

FOLDER_PATH = os.path.join(os.path.dirname(constants.PROJECT_DIRECTORY_PATH), "Simulator", "data", FOLDER_NAME)


In [5]:
data_preprocessor = datahandler.DataPreprocessorOUS_V2()
data_preprocessor.execute()

data_loader = datahandler.DataLoader(datahandler.DataPreprocessorOUS_V2)
data_loader.execute(False, False, True)


Cleaning dataset: 100%|██████████| 2/2 [00:00<00:00, 3998.38it/s]


Processing dataset: 100%|██████████| 2/2 [00:00<00:00, 4006.02it/s]
Enhancing dataset: 100%|██████████| 2/2 [00:00<?, ?it/s]
Loading dataset: 100%|██████████| 2/2 [00:02<00:00,  1.49s/it]


# Methods

In [6]:
def load_csv(fileName):
    df = pd.read_csv(os.path.join(FOLDER_PATH, fileName + ".csv"))

    response_time_cols = [
        'duration_incident_creation',
        'duration_resource_appointment',
        'duration_resource_preparing_departure',
        'duration_dispatching_to_scene'
    ]
    df['total_response_time'] = df[response_time_cols].sum(axis=1)

    return df


In [7]:
def print_results(df: pd.DataFrame):
    # Define the criteria for response times
    criteria = {
        ('A', True): 12 * 60,
        ('A', False): 25 * 60,
        ('H', True): 30 * 60,
        ('H', False): 40 * 60
    }

    # Function to calculate compliance for each group
    def calculate_compliance(group, triage, urban):
        limit = criteria.get((triage, urban))
        if limit is not None:
            return (group['total_response_time'] < limit).mean()
        return None

    # Calculate statistics and compliance for each group
    results = []
    for (triage, urban), group in df.groupby(['triage_impression_during_call', 'urban']):
        mean = group['total_response_time'].mean()
        median = group['total_response_time'].median()
        compliance = calculate_compliance(group, triage, urban)
        results.append({
            'Triage': triage,
            'Urban': urban,
            'Mean (sec)': mean,
            'Median (sec)': median,
            'Compliance': compliance
        })

    stats = pd.DataFrame(results)

    # Convert mean and median to minutes
    stats['Mean (min)'] = (stats['Mean (sec)'] / 60)
    stats['Median (min)'] = (stats['Median (sec)'] / 60)
    stats.drop(columns=['Mean (sec)', 'Median (sec)'], inplace=True)

    # Map urban values to Yes/No
    stats['Urban'] = stats['Urban'].map({True: 'Yes', False: 'No'})
    
    # Sort values
    stats.sort_values(by=["Urban", "Triage"], ascending=[False, True], inplace=True)
    
    # Display the DataFrame
    formatted_stats = stats.style.format({
        'Mean (min)': "{:.2f}",
        'Median (min)': "{:.2f}",
        'Compliance': "{:.2%}"
    }).hide(axis='index')
    IPython.display.display(formatted_stats)


In [8]:
def boxplot_time_at_steps_modified(
    dataframe: pd.DataFrame,
    triage_impression: str = None
):
    title = "Time Taken At Each Step of the Incident"

    if triage_impression is not None:
        # Filter the dataframe without overwriting the original one
        temp_df = dataframe[dataframe["triage_impression_during_call"] == triage_impression].copy()
        title += f" ({triage_impression})"
    else:
        # Use the original dataframe if no triage_impression filter is applied
        temp_df = dataframe.copy()

    steps = {
        "Creating Incident": "duration_incident_creation",
        "Appointing Resource": "duration_resource_appointment",
        "Resource to Start Task": "duration_resource_preparing_departure",
        "Dispatching to Scene": "duration_dispatching_to_scene",
        "At Scene": "duration_at_scene",
        "Dispatching to Hospital": "duration_dispatching_to_hospital",
        "At Hospital": "duration_at_hospital"
    }

    # Calculating durations for each step
    plot_data = [(temp_df[duration_column][temp_df[duration_column] > 0] / 60) for step, duration_column in steps.items()]

    # Plotting
    matplotlib.pyplot.figure(figsize=(8, 4))
    matplotlib.pyplot.boxplot(plot_data[::-1], labels=list(steps.keys())[::-1], vert=False, patch_artist=True, showfliers=False)
    matplotlib.pyplot.title(title)
    matplotlib.pyplot.xlabel("Time in Minutes")
    matplotlib.pyplot.xticks()
    matplotlib.pyplot.show()


In [9]:
def boxplot_time_at_steps(
    historic_dataframe: pd.DataFrame,
    simulated_dataframe: pd.DataFrame,
    bounds: tuple[str, str] = None,
    triage_impressions: list[str] = ["A", "H", "V1"],
):
    historic_df = historic_dataframe.copy()
    simulated_df = simulated_dataframe.copy()
    
    # filter historic data by time frame
    if bounds is not None:
        start_bound, end_bound = pd.to_datetime(bounds[0]), pd.to_datetime(bounds[1])
        historic_df = historic_df[(historic_df['time_call_received'] >= start_bound) & (historic_df['time_call_received'] <= end_bound)]

    # calculate duration at each stage in minutes (simulated dataframe already has this calculated)
    historic_steps = {
        "duration_incident_creation": ("time_call_received", "time_incident_created"),
        "duration_resource_appointment": ("time_incident_created", "time_resource_appointed"),
        "duration_resource_preparing_departure": ("time_resource_appointed", "time_ambulance_dispatch_to_scene"),
        "duration_dispatching_to_scene": ("time_ambulance_dispatch_to_scene", "time_ambulance_arrived_at_scene"),
        "duration_at_scene": ("time_ambulance_arrived_at_scene", "time_ambulance_dispatch_to_hospital", "time_ambulance_available"),
        "duration_dispatching_to_hospital": ("time_ambulance_dispatch_to_hospital", "time_ambulance_arrived_at_hospital"),
        "duration_at_hospital": ("time_ambulance_arrived_at_hospital", "time_ambulance_available")
    }

    for step, times in historic_steps.items():
        if len(times) == 3:
            historic_df.loc[historic_df[times[1]].isna(), step] = (historic_df[times[2]] - historic_df[times[0]]).dt.total_seconds() / 60
            historic_df.loc[~historic_df[times[1]].isna(), step] = (historic_df[times[1]] - historic_df[times[0]]).dt.total_seconds() / 60
        else:
            historic_df[step] = (historic_df[times[1]] - historic_df[times[0]]).dt.total_seconds() / 60
    
    # convert seconds to minutes and replace zeros in simulated data to nan
    for step in historic_steps.keys():
        simulated_df[step] /= 60

    simulated_df[["duration_dispatching_to_hospital", "duration_at_hospital"]] = simulated_df[["duration_dispatching_to_hospital", "duration_at_hospital"]].replace(0, np.nan)

    # plot data
    matplotlib.pyplot.figure(figsize=(8, 8))

    data = []
    positions = []
    labels = []
    colors = []
    stripes = []

    i = 0
    for step in historic_steps.keys():
        labels.append(f"{step}")
        for triage in triage_impressions:
            for df, stripe, alpha in zip([historic_df, simulated_df], [False, True], [1.00, 0.65]):
                data.append(df[df['triage_impression_during_call'] == triage][step].dropna())

                labels.append("")

                if (triage == "A"):
                    colors.append([1.00, 0.37, 0.28, alpha])
                elif (triage == "H"):
                    colors.append([0.12, 0.56, 1.00, alpha])
                else:
                    colors.append([0.20, 0.80, 0.20, alpha])

                stripes.append(stripe)

                positions.append(i)
                i += 0.75
        labels.pop()
        i += 1

    bplot = matplotlib.pyplot.boxplot(
        data[::-1],
        labels=labels[::-1],
        positions=positions,
        vert=False,
        patch_artist=True,
        showfliers=True
    )

    for patch, color, stripe in zip(bplot["boxes"], colors[::-1], stripes[::-1]):
        patch.set_facecolor(color)

    matplotlib.pyplot.title("Time Taken At Each Step of the Incident")
    matplotlib.pyplot.xlabel("Time in Minutes")
    matplotlib.pyplot.xticks()
    matplotlib.pyplot.show()


In [10]:
def table_results(filter_urban: bool = None, strategies = ["closest", "random"]):
    def calculate_compliance(df):
        if df is None:
            return None

        # Criteria for both triage types in both urban and rural settings
        criteria = {
            ('A', True): 12 * 60,  # 12 minutes for triage 'A' in urban areas
            ('A', False): 25 * 60, # 25 minutes for triage 'A' in rural areas
            ('H', True): 30 * 60,  # 30 minutes for triage 'H' in urban areas
            ('H', False): 40 * 60  # 40 minutes for triage 'H' in rural areas
        }

        # Initialize counters
        total_compliant_cases = 0
        total_cases = 0
        
        # Check necessary columns exist
        for urban in [False, True]:
            if filter_urban is not None and urban != filter_urban:
                continue
            for triage in ['A', 'H']:
                filtered_df = df[(df['triage_impression_during_call'] == triage) & (df['urban'] == urban)]
                limit = criteria.get((triage, urban))
                if not filtered_df.empty:
                    # Count compliant cases for this triage type
                    compliant_cases = filtered_df['total_response_time'] < limit
                    total_compliant_cases += compliant_cases.sum()
                    total_cases += len(filtered_df)
        if total_cases > 0:
            # Calculate overall compliance rate across both triage types
            overall_compliance = total_compliant_cases / total_cases
            return overall_compliance
        else:
            return None  # Return None if there are no cases to evaluate

    # Settings combinations
    prioritize_triages = [False, True]
    response_restricteds = [False, True]
    schedule_breaks = [False, True]

    results = []

    for strategy in strategies:
        for prioritize_triage in prioritize_triages:
            for response_restricted in response_restricteds:
                for schedule_break in schedule_breaks:
                    filename = f"events_strategy={strategy}_prioritizeTriage={'true' if prioritize_triage else 'false'}_responseRestricted={'true' if response_restricted else 'false'}_scheduleBreaks={'true' if schedule_break else 'false'}"
                    df = load_csv(filename)
                    compliance = calculate_compliance(df)
                    if compliance is not None:
                        results.append({
                            "Strategy": strategy,
                            "Prioritize Triage": prioritize_triage,
                            "Response Restricted": response_restricted,
                            "Schedule Breaks": schedule_break,
                            "Compliance": compliance
                        })

    # Create DataFrame
    results_df = pd.DataFrame(results)
    # Pivot Table for visualization
    pivot_table = results_df.pivot_table(index=["Schedule Breaks", "Prioritize Triage", "Response Restricted"],
                                        columns=["Strategy"], 
                                        values="Compliance")
    # Color coding from green to red
    cm = sns.light_palette("green", as_cmap=True, n_colors=8)
    styled_pivot = pivot_table.style.background_gradient(cmap=cm).format("{:.2%}")

    return styled_pivot


# Main

In [11]:
table_results()


Unnamed: 0_level_0,Unnamed: 1_level_0,Strategy,closest,random
Schedule Breaks,Prioritize Triage,Response Restricted,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,False,80.88%,22.06%
False,False,True,79.41%,23.53%
False,True,False,80.15%,22.79%
False,True,True,79.41%,22.06%
True,False,False,80.15%,25.00%
True,False,True,75.74%,22.06%
True,True,False,79.41%,26.47%
True,True,True,77.21%,21.32%


In [12]:
table_results(filter_urban=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Strategy,closest,random
Schedule Breaks,Prioritize Triage,Response Restricted,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,False,79.34%,23.97%
False,False,True,77.69%,23.97%
False,True,False,78.51%,23.97%
False,True,True,77.69%,22.31%
True,False,False,78.51%,27.27%
True,False,True,73.55%,21.49%
True,True,False,77.69%,27.27%
True,True,True,75.21%,22.31%


In [13]:
table_results(filter_urban=False)


Unnamed: 0_level_0,Unnamed: 1_level_0,Strategy,closest,random
Schedule Breaks,Prioritize Triage,Response Restricted,Unnamed: 3_level_1,Unnamed: 4_level_1
False,False,False,93.33%,6.67%
False,False,True,93.33%,20.00%
False,True,False,93.33%,13.33%
False,True,True,93.33%,20.00%
True,False,False,93.33%,6.67%
True,False,True,93.33%,26.67%
True,True,False,93.33%,20.00%
True,True,True,93.33%,13.33%


In [14]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=false_responseRestricted=false_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,68.00%,10.06,9.93
H,Yes,87.32%,21.01,17.62
V1,Yes,nan%,59.23,51.9
A,No,75.00%,20.35,21.36
H,No,100.00%,25.83,27.52
V1,No,nan%,90.57,84.51


In [15]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=false_responseRestricted=false_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,70.00%,10.38,10.23
H,Yes,84.51%,21.39,18.42
V1,Yes,nan%,59.86,51.35
A,No,75.00%,20.38,21.62
H,No,100.00%,26.29,27.6
V1,No,nan%,90.8,84.89


In [16]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=false_responseRestricted=true_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,64.00%,11.18,10.82
H,Yes,87.32%,20.86,18.65
V1,Yes,nan%,59.49,53.93
A,No,75.00%,19.14,19.11
H,No,100.00%,25.57,23.38
V1,No,nan%,90.61,84.53


In [17]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=false_responseRestricted=true_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,58.00%,11.9,11.32
H,Yes,84.51%,21.35,18.62
V1,Yes,nan%,59.34,52.12
A,No,75.00%,18.59,18.29
H,No,100.00%,26.04,27.6
V1,No,nan%,90.69,84.71


In [18]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=true_responseRestricted=false_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,68.00%,10.3,10.05
H,Yes,85.92%,21.16,18.5
V1,Yes,nan%,59.6,52.55
A,No,75.00%,17.82,16.89
H,No,100.00%,26.32,27.65
V1,No,nan%,90.58,84.6


In [19]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=true_responseRestricted=false_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,72.00%,10.5,10.31
H,Yes,81.69%,21.11,17.35
V1,Yes,nan%,61.28,51.08
A,No,75.00%,18.96,18.62
H,No,100.00%,26.3,27.85
V1,No,nan%,90.55,84.61


In [20]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=true_responseRestricted=true_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,66.00%,10.74,10.19
H,Yes,85.92%,20.88,18.23
V1,Yes,nan%,59.67,54.13
A,No,75.00%,18.78,18.49
H,No,100.00%,25.54,23.65
V1,No,nan%,90.6,84.47


In [21]:
print_results(load_csv("events_strategy=closest_prioritizeTriage=true_responseRestricted=true_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,66.00%,10.53,10.32
H,Yes,81.69%,21.43,18.6
V1,Yes,nan%,61.53,54.07
A,No,75.00%,18.1,17.5
H,No,100.00%,25.32,23.33
V1,No,nan%,90.47,84.35


In [22]:
print_results(load_csv("events_strategy=random_prioritizeTriage=false_responseRestricted=false_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,6.00%,23.95,21.78
H,Yes,36.62%,40.48,37.05
V1,Yes,nan%,79.23,74.23
A,No,0.00%,36.62,37.88
H,No,9.09%,61.95,59.68
V1,No,nan%,134.76,129.42


In [23]:
print_results(load_csv("events_strategy=random_prioritizeTriage=false_responseRestricted=false_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,12.00%,25.65,24.03
H,Yes,38.03%,40.3,35.93
V1,Yes,nan%,85.39,77.85
A,No,0.00%,37.75,37.26
H,No,9.09%,55.01,51.22
V1,No,nan%,124.37,120.64


In [24]:
print_results(load_csv("events_strategy=random_prioritizeTriage=false_responseRestricted=true_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,10.00%,25.54,24.81
H,Yes,33.80%,40.98,37.45
V1,Yes,nan%,80.7,75.68
A,No,50.00%,41.68,38.13
H,No,9.09%,60.03,64.32
V1,No,nan%,119.72,109.57


In [25]:
print_results(load_csv("events_strategy=random_prioritizeTriage=false_responseRestricted=true_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,8.00%,26.08,24.41
H,Yes,30.99%,42.55,38.3
V1,Yes,nan%,77.59,69.73
A,No,25.00%,32.7,32.64
H,No,27.27%,55.37,59.15
V1,No,nan%,138.18,135.93


In [26]:
print_results(load_csv("events_strategy=random_prioritizeTriage=true_responseRestricted=false_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,4.00%,27.44,24.22
H,Yes,38.03%,39.31,35.37
V1,Yes,nan%,82.34,81.15
A,No,0.00%,38.19,37.63
H,No,18.18%,57.74,48.3
V1,No,nan%,134.36,125.6


In [27]:
print_results(load_csv("events_strategy=random_prioritizeTriage=true_responseRestricted=false_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,14.00%,23.58,22.78
H,Yes,36.62%,41.21,36.25
V1,Yes,nan%,92.13,83.42
A,No,25.00%,35.45,35.1
H,No,18.18%,48.47,45.4
V1,No,nan%,148.79,142.29


In [28]:
print_results(load_csv("events_strategy=random_prioritizeTriage=true_responseRestricted=true_scheduleBreaks=false"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,8.00%,25.28,23.2
H,Yes,32.39%,43.46,35.9
V1,Yes,nan%,89.07,82.52
A,No,25.00%,34.46,33.61
H,No,18.18%,58.51,51.7
V1,No,nan%,129.86,134.82


In [29]:
print_results(load_csv("events_strategy=random_prioritizeTriage=true_responseRestricted=true_scheduleBreaks=true"))


Triage,Urban,Compliance,Mean (min),Median (min)
A,Yes,6.00%,26.22,24.48
H,Yes,33.80%,41.75,34.92
V1,Yes,nan%,88.28,78.02
A,No,25.00%,31.28,30.19
H,No,9.09%,61.03,59.6
V1,No,nan%,116.98,93.26
