In [8]:
import pandas as pd
import geopandas as gpd
import numpy as np

import cemo_module as cemo

In [17]:
calls_df = pd.read_csv('data/Analysis/90degreeheatday.csv', 
                       usecols= ['District','date_time','heat_index_high', 'calls', 'calls_per_10000'], 
                       parse_dates=['date_time']
                       )

In [50]:
calls_df = calls_df[calls_df['date_time']<'2023-01-01']

In [51]:
def grouping(day):
    if day >= 2:
        return 'Heat Event'
    else:
        return 'Not a Heat Event'

In [55]:
thresholds = [90, 95, 100]
districts = calls_df['District'].unique()
df_list = []

#Calculate heat events across districts across thresholds
for district in districts:
    df = calls_df[calls_df['District']==district].copy()
    df.reset_index(drop=True, inplace=True)
    for threshold in thresholds:
        
        thresh = np.vectorize(cemo.heat_threshold)(df['heat_index_high'], threshold)
        streak = cemo.streak(pd.Series(thresh)).fillna(0)

        df[f'{threshold}_degree_event'] = streak >= 2
        
    df_list.append(df)

heat_events_df = pd.concat(df_list, ignore_index=True)

In [70]:
heat_events_df.groupby('District').sum()

  heat_events_df.groupby('District').sum()


Unnamed: 0_level_0,heat_index_high,calls,calls_per_10000,90_degree_event,95_degree_event,100_degree_event
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,135661.023503,13277,3529.239766,43,15,3
2,135445.930131,19345,4112.107814,43,14,3
3,135339.129670,21475,5692.058948,39,13,3
4,135302.041357,29375,14117.844956,38,13,3
5,130202.738511,10113,3112.171103,14,3,0
...,...,...,...,...,...,...
106,138713.051552,11118,3787.171714,112,31,12
107,140424.582033,7528,4105.808563,178,41,17
108,136268.372323,850,1845.419019,59,19,6
109,137246.274388,2384,2059.433310,69,22,9


In [71]:
heat_events_df[heat_events_df['100_degree_event']==True]

Unnamed: 0,District,date_time,heat_index_high,calls,calls_per_10000,90_degree_event,95_degree_event,100_degree_event
187,1,2018-07-07,102.418791,7,1.860712,True,True,True
1707,1,2022-09-04,106.630300,10,2.658161,True,True,True
1708,1,2022-09-05,102.531571,11,2.923977,True,True,True
2013,2,2018-07-07,102.274011,11,2.338237,True,True,True
3533,2,2022-09-04,106.471933,15,3.188504,True,True,True
...,...,...,...,...,...,...,...,...
184310,109,2022-09-07,100.156562,1,0.863856,True,True,True
184311,109,2022-09-08,100.144096,3,2.591569,True,True,True
184312,109,2022-09-09,102.196592,3,2.591569,True,True,True
186134,112,2022-09-05,101.046841,3,2.891566,True,True,True


In [74]:
series_list = []

for threshold in thresholds:
    col_name = f'{threshold}_degree_event'
    #get df with average and deviation of calls by district by heat day
    expected = heat_events_df.groupby(['District', col_name])['calls'].agg(['mean', 'std']).reset_index()
    #filter to non-heat days
    expected = expected[expected[col_name]==False]
    #buffer mean with standard deviation
    expected['expected'] = expected['mean'] + expected['std']
    #merge expected calls to call volume df
    call_calc = heat_events_df[['District', 'date_time', 'calls', col_name]].merge(expected[['District', 'expected']], on='District', how='left')
    #calculate difference between observed and expected calls and restrict lower bound to zero
    #multiply by the day type field to keep only rows where heat_day=true
    call_calc['excess'] = ((call_calc['calls'] - call_calc['expected']).clip(lower=0))*call_calc[col_name]
    #sum excess calls and create series
    s = call_calc.groupby('District')['excess'].sum().rename(f'{threshold}_excess')

    series_list.append(s)

results_df = pd.concat(series_list, axis=1)
results_df

Unnamed: 0_level_0,90_excess,95_excess,100_excess
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,30.175231,12.967430,0.973294
2,45.741067,28.230483,0.974438
3,13.982710,10.531727,7.507632
4,32.192010,15.259558,2.229571
5,1.995439,1.002295,0.000000
...,...,...,...
106,35.265582,11.190588,4.719442
107,48.573001,12.540633,4.254722
108,7.875042,0.831176,0.833032
109,13.317307,6.358358,1.655890


In [73]:
call_calc

Unnamed: 0,District,date_time,calls,expected,excess
0,1,2018-01-01,9,10.026706,0.000000
1,1,2018-01-02,7,10.026706,0.000000
2,1,2018-01-03,5,10.026706,0.000000
3,1,2018-01-04,1,10.026706,0.000000
4,1,2018-01-05,6,10.026706,0.000000
...,...,...,...,...,...
186247,112,2022-12-27,5,5.742732,0.000000
186248,112,2022-12-28,4,5.742732,0.000000
186249,112,2022-12-29,10,5.742732,4.257268
186250,112,2022-12-30,3,5.742732,0.000000
