### Subdivision of CSO observed data into events

In [1]:
# Import libraries
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
# Import data
os.chdir('C:\\Users\\rpal\\Source\\modelskill\\tmp\\RPAL\\data\\obs_and_model_data_Rocco')

CSO = pd.read_csv('CSO.csv', sep=',', header=0, index_col=0, parse_dates=True)

# Remove all the rows where the observed or modelled value is missing
CSO = CSO[CSO['filtered'].notna() & CSO['model'].notna()]

Create column for detection of events

In [3]:
# Define variable used to detect events
CSO['event_signal'] = np.max(CSO[['model', 'filtered']], axis=1)

Identify events start and end based on defined threshold value

In [4]:
# Set threshold for event detection
det_thr = 0.001

# Create empty DataFrame for storing events
events = pd.DataFrame(columns=['start','end'], index=pd.Index([])) 

# Find event starts = where obs goes from <= det_thr to > det_thr
start_idx = CSO['event_signal'].shift(1).le(det_thr) & CSO['event_signal'].gt(det_thr)
start_event = CSO.index[start_idx]
events['start'] = start_event

# Find event ends = where obs goes from > det_thr to <= det_thr
end_idx = CSO['event_signal'].gt(det_thr) & CSO['event_signal'].shift(-1).le(det_thr)
end_event = CSO.index[end_idx]
events['end'] = end_event

Aggregate events that are separated by gaps shorter than given value

In [5]:
# Set min gap between events
min_gap = '1 hour'

# Calculate gap between events 
events['diff'] = events['start'] - events['end'].shift(1)

# Identify events based on min_gap
#events['check'] = (events['diff'] > min_gap)
events['ID'] = (events['diff'] > min_gap).cumsum( ) + 1
# events['fix'] = events.ID +1

# Aggregate events
events = events.groupby('ID').agg({'start':'first', 'end':'last'})


Assign event index to original series

In [6]:
CSO['event'] = 0
for e in events.index:
    CSO.loc[events['start'][e]:events['end'][e],'event'] = e

# remove columns event_signal from CSO
CSO = CSO.drop(columns=['event_signal'])

CSO.to_csv('CSO_events.csv')

Compute event signatures

In [7]:
# Event duration
events['duration'] = events['end'] - events['start']
events.duration

# Peak observed value
events['obs_peak'] = CSO.groupby('event')['obs'].max()

# Peak modelled value
events['mod_peak'] = CSO.groupby('event')['model'].max()

# Index of peak observed value
events['obs_peak_idx'] = CSO.groupby('event')['obs'].idxmax()

# Index of peak modelled value
events['mod_peak_idx'] = CSO.groupby('event')['model'].idxmax()

# Find duration of observed values for each event
events['obs_dur'] = CSO.groupby('event')['obs'].apply(
    lambda x:  (x[x > 0].index[-1]) - (x[x > 0].index[0]) if len(x[x > 0]) > 0 else 0)

# Find duration of modelled values for each event
events['mod_dur'] = CSO.groupby('event')['model'].apply(
    lambda x:  (x[x > 0].index[-1]) - (x[x > 0].index[0]) if len(x[x > 0]) > 0 else 0)

In [8]:
# Convert CSO index to regular column and call it timestep
CSO['timestamp'] = CSO.index
CSO['timestep'] = (CSO.timestamp - CSO.timestamp.shift(1)).dt.total_seconds()

# Find area under the curve of observed values for each event
CSO['obs_AUC'] = CSO['filtered'] * CSO['timestep']
events['obs_AUC'] = CSO.groupby('event')['obs_AUC'].sum()

# Find area under the curve of modelled values for each event
CSO['mod_AUC'] = CSO['model'] * CSO['timestep']
events['mod_AUC'] = CSO.groupby('event')['mod_AUC'].sum()

In [9]:
events.head()

Unnamed: 0_level_0,start,end,duration,obs_peak,mod_peak,obs_peak_idx,mod_peak_idx,obs_dur,mod_dur,obs_AUC,mod_AUC
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,2022-08-27 09:45:00,2022-08-27 12:15:00,0 days 02:30:00,0.403,0.4441,2022-08-27 11:15:00,2022-08-27 11:00:00,0 days 02:15:00,0 days 02:30:00,2269.98,2854.98
2,2022-09-18 13:45:00,2022-09-18 15:45:00,0 days 02:00:00,0.0,0.76,2022-09-18 13:45:00,2022-09-18 14:00:00,0,0 days 02:00:00,0.0,4742.19
3,2022-09-28 16:00:00,2022-09-28 17:30:00,0 days 01:30:00,0.0919,0.4449,2022-09-28 17:00:00,2022-09-28 16:30:00,0 days 00:30:00,0 days 01:30:00,170.37,1855.08
4,2022-09-28 21:00:00,2022-09-28 21:30:00,0 days 00:30:00,0.0,0.0417,2022-09-28 21:00:00,2022-09-28 21:15:00,0,0 days 00:30:00,0.0,65.43
5,2022-10-01 15:45:00,2022-10-01 16:45:00,0 days 01:00:00,0.0,0.2342,2022-10-01 15:45:00,2022-10-01 16:00:00,0,0 days 01:00:00,0.0,706.86


In [10]:
# Export events to csv
events.to_csv('CSO_events_signatures.csv')