In [1]:
import pandas as pd
import _processing_functions

In [2]:
path="../data/run_results.csv"
params_path="../data/run_params_used.csv"

df = pd.read_csv(path)
params_df = pd.read_csv(params_path)
n_runs = len(df["run_number"].unique())

# Add callsign column if not already present in the dataframe passed to the function
if 'callsign' not in df.columns:
    df = _processing_functions.make_callsign_column(df)

df

Unnamed: 0,P_ID,run_number,time_type,event_type,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome,callsign
0,1,1,71,resource_preferred_resource_group,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,,,31,55.0,Male,,,
1,1,1,helicopter,resource_preferred_vehicle_type,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,,,31,55.0,Male,,,
2,1,1,Preferred resource not and available but other...,resource_preferred_outcome,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,71.0,car,31,55.0,Male,,,C71
3,1,1,CC71,resource_use,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,71.0,car,31,55.0,Male,,,C71
4,1,1,arrival,arrival_departure,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,71.0,car,31,55.0,Male,,,C71
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415656,1725,9,HEMS call start,queue,525540.0,2026-02-17 07:00:00,Tue,7,weekday,2,1,70.0,helicopter,17,65.0,Male,Patient Conveyed,Airlifted,H70
415657,1725,9,HEMS allocated to call,queue,525565.0,2026-02-17 07:25:00,Tue,7,weekday,2,1,70.0,helicopter,17,65.0,Male,Patient Conveyed,Airlifted,H70
415658,1725,9,HEMS mobile,queue,525568.0,2026-02-17 07:28:00,Tue,7,weekday,2,1,70.0,helicopter,17,65.0,Male,Patient Conveyed,Airlifted,H70
415659,1725,9,HEMS on scene,queue,525580.0,2026-02-17 07:40:00,Tue,7,weekday,2,1,70.0,helicopter,17,65.0,Male,Patient Conveyed,Airlifted,H70


In [3]:
# let's make the decision to limit all calculations to jobs that started after the warm-up period had elapsed

In [4]:
df["hems_result"] = df.groupby(['P_ID', 'run_number']).hems_result.bfill()
df["outcome"] = df.groupby(['P_ID', 'run_number']).outcome.bfill()


call_df = df[df["time_type"] == "arrival"].drop(columns=['time_type', "event_type"])
call_df

Unnamed: 0,P_ID,run_number,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome,callsign
4,1,1,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,71.0,car,31,55.0,Male,Patient Treated (not conveyed),Deceased,C71
10,2,1,180.0,2025-02-17 11:00:00,Mon,11,weekday,2,1,71.0,car,29,71.0,Female,Stand Down En Route,Unknown,C71
22,3,1,1440.0,2025-02-18 08:00:00,Tue,8,weekday,2,1,70.0,car,29,13.0,Male,Stand Down En Route,Unknown,C70
34,4,1,2100.0,2025-02-18 19:00:00,Tue,19,weekday,2,1,70.0,car,9,73.0,Female,Stand Down En Route,Unknown,C70
46,5,1,2880.0,2025-02-19 08:00:00,Wed,8,weekday,2,1,70.0,car,9,51.0,Male,Patient Treated (not conveyed),Conveyed by land without DAA,C70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415600,1721,9,524340.1,2026-02-16 11:00:06,Mon,11,weekday,2,1,,,30,63.0,Male,,,
415616,1722,9,524880.0,2026-02-16 20:00:00,Mon,20,weekday,2,1,70.0,helicopter,30,17.0,Male,Stand Down En Route,Unknown,H70
415628,1723,9,525000.0,2026-02-16 22:00:00,Mon,22,weekday,2,1,70.0,helicopter,27,39.0,Male,Patient Treated (not conveyed),Deceased,H70
415641,1724,9,525180.0,2026-02-17 01:00:00,Tue,1,weekday,2,1,70.0,helicopter,30,89.0,Male,Patient Conveyed,Airlifted,H70


## Explore total arrivals per run

In [5]:
call_df.groupby('run_number')[['P_ID']].count().reset_index()

Unnamed: 0,run_number,P_ID
0,1,1696
1,2,1636
2,3,1659
3,4,1690
4,5,1663
5,6,1650
6,7,1748
7,8,1699
8,9,1725
9,10,1741


## Explore average arrivals per run

In [6]:
call_df.groupby('run_number')[['P_ID']].count().reset_index().mean()['P_ID'].round(2)

1685.6

# Explore calls per hour

In [7]:
hourly_calls_per_run = call_df.groupby(['hour', 'run_number'])[['P_ID']].count().reset_index().rename(columns={"P_ID": "count"})
hourly_calls_per_run

Unnamed: 0,hour,run_number,count
0,0,1,33
1,0,2,22
2,0,3,36
3,0,4,32
4,0,5,24
...,...,...,...
438,23,16,44
439,23,17,43
440,23,18,48
441,23,19,27


In [8]:
import plotly.express as px

In [9]:
px.box(hourly_calls_per_run, x="hour", y="count").update_xaxes(dtick=1)

In [10]:
aggregated_data = hourly_calls_per_run.groupby("hour").agg(
    mean_count=("count", "mean"),
    std_count=("count", "std")
).reset_index()

px.bar(
    aggregated_data,
    x="hour",
    y="mean_count",
    error_y="std_count",
    labels={"mean_count": "Average Count"},
    title="Total Calls per Run - By Hour"
).update_xaxes(dtick=1)

In [11]:
aggregated_data = hourly_calls_per_run.groupby("hour").agg(
    mean_count=("count", "mean"),
    std_count=("count", "std")
).reset_index()

aggregated_data['mean_count'] = aggregated_data['mean_count'] / (float(_processing_functions.get_param("sim_duration", params_df))/60/24)
aggregated_data['std_count'] = aggregated_data['std_count'] / (float(_processing_functions.get_param("sim_duration", params_df))/60/24)

px.bar(
    aggregated_data,
    x="hour",
    y="mean_count",
    error_y="std_count",
    labels={"mean_count": "Average Count"},
    title="Total Calls per Run - By Hour"
).update_xaxes(dtick=1)

In [12]:
float(_processing_functions.get_param("sim_duration", params_df))/60/24

365.0

In [13]:
hourly_calls_per_run['average_per_day'] = hourly_calls_per_run['count'] / (float(_processing_functions.get_param("sim_duration", params_df))/60/24)
px.box(hourly_calls_per_run, x="hour", y="average_per_day").update_xaxes(dtick=1)

## Explore resource preferred outcome col

In [14]:
df[df['event_type']=="resource_preferred_outcome"].groupby(["run_number",'time_type'])[['time_type']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,time_type
run_number,time_type,Unnamed: 2_level_1
1,No resource in group available,20
1,Preferred resource available and allocated,987
1,Preferred resource not and available but other resource in same group allocated,689
2,No resource in group available,20
2,Preferred resource available and allocated,857
2,Preferred resource not and available but other resource in same group allocated,759
3,No resource in group available,15
3,Preferred resource available and allocated,857
3,Preferred resource not and available but other resource in same group allocated,787
4,No resource in group available,32


In [15]:
(df[df['event_type']=="resource_preferred_outcome"].groupby(['time_type'])[['time_type']].count()/n_runs).round(0).astype('int').rename(columns={'time_type': 'Count'}).reset_index().rename(columns={'time_type': 'Resource Allocation Attempt Outcome'})

Unnamed: 0,Resource Allocation Attempt Outcome,Count
0,No resource in group available,24
1,Preferred resource available and allocated,933
2,Preferred resource not and available but other...,729


In [16]:
(df[df['event_type']=="resource_preferred_outcome"].groupby(['time_type', 'run_number'])[['time_type']].count()/n_runs).round(0).astype('int').rename(columns={'time_type': 'Count'}).reset_index().rename(columns={'time_type': 'Resource Allocation Attempt Outcome', 'run_number': "Run"})

Unnamed: 0,Resource Allocation Attempt Outcome,Run,Count
0,No resource in group available,1,1
1,No resource in group available,2,1
2,No resource in group available,3,1
3,No resource in group available,4,2
4,No resource in group available,5,1
5,No resource in group available,6,2
6,No resource in group available,7,1
7,No resource in group available,8,1
8,No resource in group available,9,2
9,No resource in group available,10,2


In [17]:
import _job_count_calculation

_job_count_calculation.plot_hourly_call_counts(call_df, params_df)



In [18]:
from IPython.core.display import HTML
google_font_url = "https://fonts.googleapis.com/css2?family=Poppins:wght@400;700&display=swap"
# Inject the CSS into the Jupyter Notebook
HTML(f"""
<style>
@import url('{google_font_url}');
body, .plotly-title, .plotly-axis-label, .plotly-tick-label {{
    font-family: 'Poppins', sans-serif !important;
}}
</style>
""")

In [19]:
_job_count_calculation.plot_hourly_call_counts(call_df, params_df, average_per_hour=True)

In [20]:
_job_count_calculation.plot_hourly_call_counts(call_df, params_df, average_per_hour=True,
                                               error_bar_colour="red")

In [21]:
_job_count_calculation.plot_hourly_call_counts(call_df, params_df, average_per_hour=True, use_poppins=True)

In [22]:
_job_count_calculation.plot_hourly_call_counts(call_df, params_df, average_per_hour=False, box_plot=True)

In [23]:
_job_count_calculation.plot_hourly_call_counts(call_df, params_df, average_per_hour=False, box_plot=True, use_poppins=True)

In [24]:
_job_count_calculation.plot_hourly_call_counts(call_df, params_df, box_plot=True, average_per_hour=True)

In [25]:
import _vehicle_calculation

In [26]:
run_results = pd.read_csv("../data/run_results.csv")
df = _vehicle_calculation.resource_allocation_outcomes(run_results)
df

Unnamed: 0,Resource Allocation Attempt Outcome,Count
0,No resource in group available,24
1,Preferred resource available and allocated,933
2,Preferred resource not and available but other...,729


In [27]:
df[df["Resource Allocation Attempt Outcome"] =="No resource in group available"]['Count'].values[0]

24

In [28]:
df['Count'].sum()

1686

In [29]:
(7/488)*100

1.4344262295081966

In [30]:
call_df

Unnamed: 0,P_ID,run_number,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome,callsign
4,1,1,0.0,2025-02-17 08:00:00,Mon,8,weekday,2,1,71.0,car,31,55.0,Male,Patient Treated (not conveyed),Deceased,C71
10,2,1,180.0,2025-02-17 11:00:00,Mon,11,weekday,2,1,71.0,car,29,71.0,Female,Stand Down En Route,Unknown,C71
22,3,1,1440.0,2025-02-18 08:00:00,Tue,8,weekday,2,1,70.0,car,29,13.0,Male,Stand Down En Route,Unknown,C70
34,4,1,2100.0,2025-02-18 19:00:00,Tue,19,weekday,2,1,70.0,car,9,73.0,Female,Stand Down En Route,Unknown,C70
46,5,1,2880.0,2025-02-19 08:00:00,Wed,8,weekday,2,1,70.0,car,9,51.0,Male,Patient Treated (not conveyed),Conveyed by land without DAA,C70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415600,1721,9,524340.1,2026-02-16 11:00:06,Mon,11,weekday,2,1,,,30,63.0,Male,,,
415616,1722,9,524880.0,2026-02-16 20:00:00,Mon,20,weekday,2,1,70.0,helicopter,30,17.0,Male,Stand Down En Route,Unknown,H70
415628,1723,9,525000.0,2026-02-16 22:00:00,Mon,22,weekday,2,1,70.0,helicopter,27,39.0,Male,Patient Treated (not conveyed),Deceased,H70
415641,1724,9,525180.0,2026-02-17 01:00:00,Tue,1,weekday,2,1,70.0,helicopter,30,89.0,Male,Patient Conveyed,Airlifted,H70


In [31]:
call_df['timestamp_dt'] = pd.to_datetime(call_df['timestamp_dt'])
call_df['month_start'] = call_df['timestamp_dt'].dt.to_period('M').dt.to_timestamp()

call_df.groupby(['run_number', 'month_start'])[['P_ID']].count()

Unnamed: 0_level_0,Unnamed: 1_level_0,P_ID
run_number,month_start,Unnamed: 2_level_1
1,2025-02-01,68
1,2025-03-01,128
1,2025-04-01,166
1,2025-05-01,148
1,2025-06-01,162
...,...,...
20,2025-10-01,121
20,2025-11-01,108
20,2025-12-01,129
20,2026-01-01,144


In [32]:
px.line(
    call_df.groupby(['run_number', 'month_start'])[['P_ID']].count().reset_index(),
    x="month_start", y="P_ID", color="run_number")

In [33]:
from _app_utils import DAA_COLORSCHEME

In [34]:
import plotly.graph_objects as go
import numpy as np

call_counts_monthly = call_df.groupby(['run_number', 'month_start'])[['P_ID']].count().reset_index()

# Identify first and last month in the dataset
first_month = call_counts_monthly["month_start"].min()
last_month = call_counts_monthly["month_start"].max()

# Filter out the first and last month
call_counts_monthly = call_counts_monthly[(call_counts_monthly["month_start"] != first_month) & (call_counts_monthly["month_start"] != last_month)]

# Compute statistics
summary = call_counts_monthly.groupby("month_start")["P_ID"].agg(["mean", "std", "count"]).reset_index()
summary["ci95_hi"] = summary["mean"] + 1.96 * (summary["std"] / np.sqrt(summary["count"]))
summary["ci95_lo"] = summary["mean"] - 1.96 * (summary["std"] / np.sqrt(summary["count"]))

# Create the plot
fig = px.line(summary, x="month_start", y="mean",
              markers=True,
              labels={"mean": "Average Calls Per Month",
                      "month_start": "Month"},
              title="Number of Monthly Calls Received in Simulation")

# Add confidence interval as a shaded region
fig.add_traces([
    go.Scatter(
        x=summary["month_start"], y=summary["ci95_hi"], mode="lines", line=dict(width=0),
        showlegend=False
    ),
    go.Scatter(
        x=summary["month_start"], y=summary["ci95_lo"], mode="lines", fill="tonexty",
        line=dict(width=0), fillcolor="rgba(0, 176, 185, 0.4)",
        # fillcolor=DAA_COLORSCHEME['verylightblue'],
        # opacity=0.1,
        showlegend=True, name="95% Range"
    )
])

for run in call_counts_monthly["run_number"].unique():
    run_data = call_counts_monthly[call_counts_monthly["run_number"] == run]
    fig.add_trace(
        go.Scatter(
            x=run_data["month_start"], y=run_data["P_ID"],
            mode="lines", line=dict(color="gray", width=2, dash='dot'),
            opacity=0.6, name=f"Run {run}", showlegend=False,
        )
    )

fig = fig.update_yaxes({'range': (0, call_counts_monthly["P_ID"].max()*1.1)})


# Show the plot
fig.show()

In [35]:
DAA_COLORSCHEME['teal']

'#00B0B9'

In [36]:
use_poppins = True

if use_poppins:
    fig.update_layout(font=dict(family="Poppins", size=18, color="black"))
else:
    fig

fig.show()

In [37]:
list(DAA_COLORSCHEME.values())

['#D50032',
 '#00205B',
 '#1D428A',
 '#00B0B9',
 '#C0F0F2',
 '#D5F5F6',
 '#CCCCCC',
 '#4D4D4D',
 '#1F1F1F',
 '#56E39F',
 '#264027',
 '#F8C630',
 '#FFA400']

In [38]:
list(DAA_COLORSCHEME.values())[5]

'#D5F5F6'

In [None]:
historical_jobs_per_month = pd.read_csv("../historical_data/historical_jobs_per_month.csv", parse_dates=False)
historical_jobs_per_month["Month"] = pd.to_datetime(historical_jobs_per_month['Month'])
historical_jobs_per_month

Unnamed: 0,Month,Jobs
0,2019-04-01,138
1,2019-05-01,111
2,2019-06-01,158
3,2019-07-01,180
4,2019-08-01,123
...,...,...
65,2024-09-01,162
66,2024-10-01,215
67,2024-11-01,182
68,2024-12-01,174


In [40]:
px.line(historical_jobs_per_month, x="Month", y="Jobs")

In [41]:
historical_jobs_per_month["Month_Numeric"] = historical_jobs_per_month["Month"].apply(lambda x: x.month)
historical_jobs_per_month["Year_Numeric"] = historical_jobs_per_month["Month"].apply(lambda x: x.year)
historical_jobs_per_month

Unnamed: 0,Month,Jobs,Month_Numeric,Year_Numeric
0,2019-04-01,138,4,2019
1,2019-05-01,111,5,2019
2,2019-06-01,158,6,2019
3,2019-07-01,180,7,2019
4,2019-08-01,123,8,2019
...,...,...,...,...
65,2024-09-01,162,9,2024
66,2024-10-01,215,10,2024
67,2024-11-01,182,11,2024
68,2024-12-01,174,12,2024


In [42]:
first_month
import datetime

In [43]:
historical_jobs_per_month["New_Date"] = historical_jobs_per_month["Month"].apply(lambda x: datetime.date(year=first_month.year,day=1,month=x.month))
historical_jobs_per_month

Unnamed: 0,Month,Jobs,Month_Numeric,Year_Numeric,New_Date
0,2019-04-01,138,4,2019,2025-04-01
1,2019-05-01,111,5,2019,2025-05-01
2,2019-06-01,158,6,2019,2025-06-01
3,2019-07-01,180,7,2019,2025-07-01
4,2019-08-01,123,8,2019,2025-08-01
...,...,...,...,...,...
65,2024-09-01,162,9,2024,2025-09-01
66,2024-10-01,215,10,2024,2025-10-01
67,2024-11-01,182,11,2024,2025-11-01
68,2024-12-01,174,12,2024,2025-12-01


In [44]:
px.line(historical_jobs_per_month, x="New_Date", y="Jobs", color="Year_Numeric",
        color_discrete_sequence=[DAA_COLORSCHEME["red"], DAA_COLORSCHEME["blue"], DAA_COLORSCHEME["darkgrey"],
                                 DAA_COLORSCHEME["teal"], DAA_COLORSCHEME["darkgreen"], DAA_COLORSCHEME["orange"]])

In [45]:
call_df[call_df['callsign'].isna()]

Unnamed: 0,P_ID,run_number,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome,callsign,month_start
258,22,1,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,,,2025-02-01
263,23,1,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,9,48.0,Male,,,,2025-02-01
358,31,1,9360.1,2025-02-23 20:00:06,Sun,20,weekend,2,1,,,12,59.0,Female,,,,2025-02-01
382,33,1,10140.1,2025-02-24 09:00:06,Mon,9,weekday,2,1,,,6,48.0,Male,,,,2025-02-01
428,37,1,10980.1,2025-02-24 23:00:06,Mon,23,weekday,2,1,,,25,21.0,Female,,,,2025-02-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415182,1687,9,511740.1,2026-02-07 17:00:06,Sat,17,weekend,2,1,,,9,56.0,Male,,,,2026-02-01
415234,1691,9,515400.1,2026-02-10 06:00:06,Tue,6,weekday,2,1,,,17,44.0,Female,,,,2026-02-01
415415,1706,9,519060.1,2026-02-12 19:00:06,Thu,19,weekday,2,1,,,6,69.0,Male,,,,2026-02-01
415556,1717,9,523260.1,2026-02-15 17:00:06,Sun,17,weekend,2,1,,,31,46.0,Female,,,,2026-02-01


In [46]:
call_df[call_df['callsign'].isna()].groupby('run_number')[['P_ID']].count().reset_index()

Unnamed: 0,run_number,P_ID
0,1,175
1,2,230
2,3,201
3,4,210
4,5,163
5,6,154
6,7,184
7,8,208
8,9,197
9,10,234


In [47]:
unattended_calls_per_run = call_df[call_df['callsign'].isna()].groupby('run_number')[['P_ID']].count().reset_index()

unattended_calls_per_run['P_ID'].mean()

190.6

In [48]:
_job_count_calculation.display_UNTATTENDED_calls_per_run(call_df)

'190.6 of 1685.6 (11.3%)'

In [49]:
_vehicle_calculation.get_perc_unattended_string(run_results)

'24 of 1686 (1.4%)'

In [52]:
no_callsign = call_df[call_df['callsign'].isna()][['P_ID', 'run_number']]

In [57]:
no_callsign_full = no_callsign.merge(run_results, how="inner").sort_values(["run_number", "P_ID", "timestamp"] )


Unnamed: 0,P_ID,run_number,time_type,event_type,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome
0,22,1,71,resource_preferred_resource_group,7740.0,2025-02-22 17:00:00,Sat,17,weekend,2,1,,,31,65.0,Female,,
1,22,1,helicopter,resource_preferred_vehicle_type,7740.0,2025-02-22 17:00:00,Sat,17,weekend,2,1,,,31,65.0,Female,,
2,22,1,Preferred resource not and available but other...,resource_preferred_outcome,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
3,22,1,arrival,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
4,22,1,No HEMS available,queue,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17460,1705,20,Preferred resource not and available but other...,resource_preferred_outcome,524700.1,2026-02-16 17:00:06,Mon,17,weekday,2,1,,,9,69.0,Male,,
17461,1705,20,arrival,arrival_departure,524700.1,2026-02-16 17:00:06,Mon,17,weekday,2,1,,,9,69.0,Male,,
17462,1705,20,No HEMS available,queue,524700.1,2026-02-16 17:00:06,Mon,17,weekday,2,1,,,9,69.0,Male,,
17463,1705,20,depart,arrival_departure,524700.1,2026-02-16 17:00:06,Mon,17,weekday,2,1,,,9,69.0,Male,,


In [72]:
no_callsign_full[['P_ID', 'run_number']].drop_duplicates()

Unnamed: 0,P_ID,run_number
0,22,1
7,23,1
14,31,1
21,33,1
28,37,1
...,...,...
17430,1623,20
17437,1646,20
17444,1680,20
17451,1688,20


In [64]:

filtered_df = no_callsign_full[(no_callsign_full["P_ID"] == 22) & (no_callsign_full["run_number"] == 1)]
filtered_df['time_type'].astype('str')
filtered_df

Unnamed: 0,P_ID,run_number,time_type,event_type,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome
0,22,1,71,resource_preferred_resource_group,7740.0,2025-02-22 17:00:00,Sat,17,weekend,2,1,,,31,65.0,Female,,
1,22,1,helicopter,resource_preferred_vehicle_type,7740.0,2025-02-22 17:00:00,Sat,17,weekend,2,1,,,31,65.0,Female,,
2,22,1,Preferred resource not and available but other...,resource_preferred_outcome,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
3,22,1,arrival,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
4,22,1,No HEMS available,queue,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
5,22,1,depart,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,
6,22,1,depart,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,31,65.0,Female,,


In [69]:
no_callsign_full[no_callsign_full["event_type"] == "resource_preferred_outcome"]['time_type'].value_counts()

time_type
Preferred resource not and available but other resource in same group allocated    2004
Preferred resource available and allocated                                         1745
No resource in group available                                                       63
Name: count, dtype: int64

In [73]:

filtered_df = no_callsign_full[(no_callsign_full["P_ID"] == 23) & (no_callsign_full["run_number"] == 1)]
filtered_df['time_type'].astype('str')
filtered_df

Unnamed: 0,P_ID,run_number,time_type,event_type,timestamp,timestamp_dt,day,hour,weekday,month,qtr,callsign_group,vehicle_type,ampds_card,age,sex,hems_result,outcome
7,23,1,70,resource_preferred_resource_group,7740.0,2025-02-22 17:00:00,Sat,17,weekend,2,1,,,9,48.0,Male,,
8,23,1,car,resource_preferred_vehicle_type,7740.0,2025-02-22 17:00:00,Sat,17,weekend,2,1,,,9,48.0,Male,,
9,23,1,Preferred resource available and allocated,resource_preferred_outcome,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,9,48.0,Male,,
10,23,1,arrival,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,9,48.0,Male,,
11,23,1,No HEMS available,queue,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,9,48.0,Male,,
12,23,1,depart,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,9,48.0,Male,,
13,23,1,depart,arrival_departure,7740.1,2025-02-22 17:00:06,Sat,17,weekend,2,1,,,9,48.0,Male,,


In [65]:
px.scatter(filtered_df,
           x="timestamp_dt", y="time_type", color="event_type")

In [75]:
call_counts_monthly = call_df.groupby(
        ['run_number', 'month_start']
        )[['P_ID']].count().reset_index().rename(columns={"P_ID": "monthly_calls"})

# Identify first and last month in the dataset
first_month = call_counts_monthly["month_start"].min()
last_month = call_counts_monthly["month_start"].max()

# Filter out the first and last month
call_counts_monthly = (
    call_counts_monthly[
        (call_counts_monthly["month_start"] != first_month) &
        (call_counts_monthly["month_start"] != last_month)
        ]
)

call_counts_monthly

Unnamed: 0,run_number,month_start,monthly_calls
1,1,2025-03-01,128
2,1,2025-04-01,166
3,1,2025-05-01,148
4,1,2025-06-01,162
5,1,2025-07-01,165
...,...,...,...
254,20,2025-09-01,167
255,20,2025-10-01,121
256,20,2025-11-01,108
257,20,2025-12-01,129


In [79]:
# 90th Percentile
def q90(x):
    return x.quantile(0.9)

# 90th Percentile
def q10(x):
    return x.quantile(0.1)

summary = (
        call_counts_monthly.groupby("month_start")["monthly_calls"]
        .agg(["mean", "std", "count", "max", "min", q10, q90])
        .reset_index()
        )

summary["ci95_hi"] = summary["mean"] + 1.96 * (summary["std"] / np.sqrt(summary["count"]))
summary["ci95_lo"] = summary["mean"] - 1.96 * (summary["std"] / np.sqrt(summary["count"]))

summary


Unnamed: 0,month_start,mean,std,count,max,min,q10,q90,ci95_hi,ci95_lo
0,2025-03-01,136.85,11.864476,20,159,114,119.6,147.6,142.049836,131.650164
1,2025-04-01,155.1,13.905319,20,185,128,139.5,169.5,161.194275,149.005725
2,2025-05-01,154.35,11.141553,20,179,138,145.6,167.1,159.233001,149.466999
3,2025-06-01,154.2,12.081304,20,171,129,138.5,165.6,159.494865,148.905135
4,2025-07-01,149.5,12.886141,20,181,129,134.6,165.8,155.1476,143.8524
5,2025-08-01,150.25,16.936025,20,175,115,122.8,171.2,157.67254,142.82746
6,2025-09-01,150.65,14.44873,20,178,127,132.9,170.3,156.982435,144.317565
7,2025-10-01,121.6,11.67273,20,141,101,105.6,135.0,126.7158,116.4842
8,2025-11-01,118.2,7.445239,20,135,106,109.8,125.8,121.46302,114.93698
9,2025-12-01,124.85,10.619124,20,150,105,111.8,136.1,129.504036,120.195964


In [None]:
historical_jobs_per_month = pd.read_csv("../historical_data/historical_jobs_per_month.csv", parse_dates=False)
# Convert to datetime
# (using 'parse_dates=True' in read_csv isn't reliably doing that, so make it explicit here)
historical_jobs_per_month["Month"] = pd.to_datetime(historical_jobs_per_month['Month'])

historical_jobs_per_month["Month_Numeric"] = (
    historical_jobs_per_month["Month"].apply(lambda x: x.month)
    )

historical_jobs_per_month["Year_Numeric"] = (
    historical_jobs_per_month["Month"]
    .apply(lambda x: x.year)
    )

historical_jobs_per_month

Unnamed: 0,Month,Jobs,Month_Numeric,Year_Numeric
0,2019-04-01,138,4,2019
1,2019-05-01,111,5,2019
2,2019-06-01,158,6,2019
3,2019-07-01,180,7,2019
4,2019-08-01,123,8,2019
...,...,...,...,...
65,2024-09-01,162,9,2024
66,2024-10-01,215,10,2024
67,2024-11-01,182,11,2024
68,2024-12-01,174,12,2024


In [87]:
historical_jobs_per_month["Year_Numeric"] = (
            historical_jobs_per_month["Month"]
            .apply(lambda x: x.year)
            )

historical_summary = (
    historical_jobs_per_month
    .groupby('Month_Numeric')['Jobs']
    .agg(["max","min"])
    .reset_index()
    .rename(columns={"max": "historic_max", "min": "historic_min"})
    )


historical_summary

Unnamed: 0,Month_Numeric,historic_max,historic_min
0,1,193,93
1,2,183,75
2,3,224,83
3,4,196,76
4,5,238,111
5,6,243,158
6,7,227,180
7,8,224,123
8,9,226,130
9,10,215,110


In [82]:
# historical_jobs_per_month.groupby('Month_Numeric')[['Jobs']].agg(["max","min"]).reset_index()

Unnamed: 0_level_0,Month_Numeric,Jobs,Jobs
Unnamed: 0_level_1,Unnamed: 1_level_1,max,min
0,1,193,93
1,2,183,75
2,3,224,83
3,4,196,76
4,5,238,111
5,6,243,158
6,7,227,180
7,8,224,123
8,9,226,130
9,10,215,110


In [84]:
call_df['timestamp_dt'] = pd.to_datetime(call_df['timestamp_dt'])
call_df['month_start'] = call_df['timestamp_dt'].dt.to_period('M').dt.to_timestamp()

call_counts_monthly = call_df.groupby(
    ['run_number', 'month_start']
    )[['P_ID']].count().reset_index().rename(columns={"P_ID": "monthly_calls"})

# Identify first and last month in the dataset
first_month = call_counts_monthly["month_start"].min()
last_month = call_counts_monthly["month_start"].max()

# Filter out the first and last month
call_counts_monthly = (
    call_counts_monthly[
        (call_counts_monthly["month_start"] != first_month) &
        (call_counts_monthly["month_start"] != last_month)
        ]
)

call_counts_monthly

Unnamed: 0,run_number,month_start,monthly_calls
1,1,2025-03-01,128
2,1,2025-04-01,166
3,1,2025-05-01,148
4,1,2025-06-01,162
5,1,2025-07-01,165
...,...,...,...
254,20,2025-09-01,167
255,20,2025-10-01,121
256,20,2025-11-01,108
257,20,2025-12-01,129


In [110]:
jobs_per_hour_historic = pd.read_csv("../actual_data/jobs_by_hour.csv")
jobs_per_hour_historic['month'] = pd.to_datetime(jobs_per_hour_historic['month'],dayfirst=True)

jobs_per_hour_historic['year_numeric'] = jobs_per_hour_historic['month'].apply(lambda x: x.year)
jobs_per_hour_historic['month_numeric'] = jobs_per_hour_historic['month'].apply(lambda x: x.month)
jobs_per_hour_historic_long = jobs_per_hour_historic.melt(id_vars=['month','month_numeric', 'year_numeric'])
jobs_per_hour_historic_long["hour"] = jobs_per_hour_historic_long['variable'].str.extract(r"(\d+)\s")
jobs_per_hour_historic_long["hour"] = jobs_per_hour_historic_long["hour"].astype('int')
jobs_per_hour_historic_long = jobs_per_hour_historic_long[~jobs_per_hour_historic_long['value'].isna()]
jobs_per_hour_historic_long

Unnamed: 0,month,month_numeric,year_numeric,variable,value,hour
0,2023-01-01,1,2023,0 to 1,3.0,0
1,2023-02-01,2,2023,0 to 1,4.0,0
2,2023-03-01,3,2023,0 to 1,2.0,0
3,2023-04-01,4,2023,0 to 1,5.0,0
4,2023-05-01,5,2023,0 to 1,2.0,0
...,...,...,...,...,...,...
561,2023-10-01,10,2023,23 to 24,2.0,23
562,2023-11-01,11,2023,23 to 24,5.0,23
563,2023-12-01,12,2023,23 to 24,6.0,23
564,2024-01-01,1,2024,23 to 24,4.0,23


In [120]:
historical_data = jobs_per_hour_historic_long.groupby(['hour'])['value'].agg(['min','max', q10, q90]).reset_index()
historical_data

Unnamed: 0,hour,min,max,q10,q90
0,0,1.0,8.0,2.0,7.0
1,1,0.0,5.0,0.0,4.0
2,2,0.0,1.0,0.0,1.0
3,3,0.0,1.0,0.0,0.0
4,4,0.0,6.0,0.0,1.7
5,5,0.0,7.0,0.0,1.7
6,6,0.0,16.0,0.0,3.7
7,7,2.0,14.0,2.3,6.4
8,8,4.0,15.0,5.0,14.0
9,9,7.0,17.0,9.3,15.7


In [103]:
hourly_calls_per_run = call_df.groupby(['hour', 'run_number'])[['P_ID']].count().reset_index().rename(columns={"P_ID": "count"})
hourly_calls_per_run

Unnamed: 0,hour,run_number,count
0,0,1,33
1,0,2,22
2,0,3,36
3,0,4,32
4,0,5,24
...,...,...,...
438,23,16,44
439,23,17,43
440,23,18,48
441,23,19,27


In [117]:
aggregated_data = hourly_calls_per_run.groupby("hour").agg(
            mean_count=("count", "mean"),
            std_count=("count", "std")
        ).reset_index()

fig = px.bar(
                aggregated_data,
                x="hour",
                y="mean_count",

                labels={"mean_count": "Total Calls Per Hour Across Simulation<br>Averaged Across Simulation Runs",
                        "hour": "Hour"},

            ).update_xaxes(dtick=1)

fig

In [121]:
historical_data

Unnamed: 0,hour,min,max,q10,q90
0,0,1.0,8.0,2.0,7.0
1,1,0.0,5.0,0.0,4.0
2,2,0.0,1.0,0.0,1.0
3,3,0.0,1.0,0.0,0.0
4,4,0.0,6.0,0.0,1.7
5,5,0.0,7.0,0.0,1.7
6,6,0.0,16.0,0.0,3.7
7,7,2.0,14.0,2.3,6.4
8,8,4.0,15.0,5.0,14.0
9,9,7.0,17.0,9.3,15.7


In [122]:
fig.add_trace(go.Bar(
    x=historical_data["hour"],
    y=historical_data["max"] - historical_data["min"],  # The range
    base=historical_data["min"],  # Starts from the minimum
    name="Historical Range",
    marker_color="rgba(100, 100, 255, 0.3)",  # Light blue with transparency
    hoverinfo="skip",  # Hide hover info for clarity
    showlegend=True
))