### Run this first and then run the get_common_annotations.ipynb notebook

In [1]:
import sys
sys.path.append('..')

import numpy as np
from tqdm.notebook import tqdm
import h5py
from lib.Utilities import *
import os
from collections import Counter
import pandas as pd

import csv

In [2]:
# datafile = '/storage/ms5267@drexel.edu/precicecap_downloads/90_Patient_2023-03-21_12:19.h5'

annotation_group_names = []

# Directory containing the HDF5 files
directory_path = '/storage/ms5267@drexel.edu/precicecap_downloads/'

for filename in os.listdir(directory_path):
	if filename.endswith('.h5'):
		log_info(f"Processing {filename}")
		datafile = os.path.join(directory_path, filename)
		with h5py.File(datafile, 'r') as file:
			annotations_group = file['Annotations']
			for name, subgroup in annotations_group.items():
				annotation_data = file[f'Annotations/{name}'][:]
				if len(annotation_data)>0:
					annotation_group_names.extend(list(annotation_data[:, -2]))
				else:
					log_info("No annotation group")

print(Counter(annotation_group_names))

15:44:23 :	  Processing 85_Patient_2023-05-12_17:53.h5 

15:44:23 :	  Processing 59_Patient_2022-01-31_23:19.h5 

15:44:23 :	  Processing 74_Patient_2023-08-05_06:00.h5 

15:44:23 :	  Processing 110_Patient_2023_Sep_28__23_52_07_705708.h5 

15:44:23 :	  Processing 4_Patient_2022-02-05_08:59.h5 

15:44:23 :	  Processing 73_Patient_2017_Dec_18__11_19_55_297272.h5 

15:44:23 :	  Processing 34_Patient_2023-04-04_22:31.h5 

15:44:23 :	  Processing 53_Patient_2023-06-25_21:39.h5 

15:44:23 :	  Processing 101_Patient_2023_Nov_9__22_24_41_155873.h5 

15:44:23 :	  Processing 90_Patient_2023-03-21_12:19.h5 

15:44:23 :	  Processing 50_Patient_2023-06-12_21:10.h5 

15:44:23 :	  Processing 35_Patient_2023-04-03_19:51.h5 

15:44:23 :	  Processing 55_Patient_2023-06-13_00:47.h5 

15:44:23 :	  Processing 139_Patient_2024_Mar_4__7_32_51_662674.h5 

Counter({b'ECG': 5448, b'ABP': 3870, b'ART': 3644, b'ART2': 84, b'EEG': 66, b'Temp': 32, b'ART1': 26, b'na': 24, b'RR': 21, b'PLETH': 21, b'SpO2': 20, b'RE

In [3]:
# Run this to update the annotation file.
# Directory containing the HDF5 files
directory_path = '/storage/ms5267@drexel.edu/precicecap_downloads/'

keys_to_select = ['Waveforms/ART_na_Timestamps', 'Waveforms/ABP_na_Timestamps', 'Waveforms/ECG_II_Timestamps']
ann_keys = [b'ART', b'ABP', b'ECG']
ecg_abp_annotations = []
for filename in tqdm(os.listdir(directory_path)):
	if filename.endswith('.h5'):
		log_info(f"Processing {filename}")
		datafile = os.path.join(directory_path, filename)
		final_arr = np.array([['ID1', 'ID2', 'Session', 'Data_Type', 'Start_Time', 'End_Time', 'Signal_Type', 'Lead_Type']], dtype=str)
		with h5py.File(datafile, 'r') as file:
			# Process one data group at a time
			for ind, datagroup in enumerate(keys_to_select):
				# If the group does not exist in the file
				if datagroup not in file:
					continue
				
				timestamp = file[datagroup][:]

				annotations_group = file['Annotations']
			
				for name, subgroup in annotations_group.items():
					annotation_data = file[f'Annotations/{name}'][:]
					# If no annotation data need to print that there is no annotation for this file.
					if len(annotation_data)==0:
						log_info(f"No annotation group in {filename}")
						continue
					
					# print(ann_keys, annotation_data[:, -2])
					mask =  (annotation_data[:, -2] == ann_keys[ind])

					annotation_t = annotation_data[mask]

				# Create the annotation indices from the timestampe.
				idx_list=[]
				for ann in annotation_t:
					ann_start_ts = int(float(ann[2])*1e3)
					ann_end_ts = int(float(ann[3])*1e3)
					
					ann_start_idx, ann_end_idx = find_idx_from_ts(timestamp, ann_start_ts, ann_end_ts)
					
					idx_list.append([ann_start_idx, ann_end_idx])

				# print(np.array(idx_list).shape, np.array(annotation_t).shape)
				ann_array = np.hstack((np.array(idx_list), annotation_t)).astype(str)
				# print(ann_array.shape, final_arr.shape)

				final_arr = np.vstack((final_arr, ann_array ))

			log_info(np.array(final_arr).shape)
			np.savetxt(f'../data/annotations/{filename}-annotations.csv', final_arr, delimiter=',', fmt='%s')

  0%|          | 0/17 [00:00<?, ?it/s]

18:56:22 :	  Processing 85_Patient_2023-05-12_17:53.h5 

18:56:27 :	  (1003, 8) 

18:56:27 :	  Processing 59_Patient_2022-01-31_23:19.h5 

18:56:27 :	  (621, 8) 

18:56:27 :	  Processing 74_Patient_2023-08-05_06:00.h5 

18:56:33 :	  (1220, 8) 

18:56:33 :	  Processing 110_Patient_2023_Sep_28__23_52_07_705708.h5 

18:56:38 :	  (3757, 8) 

18:56:39 :	  Processing 4_Patient_2022-02-05_08:59.h5 

18:56:39 :	  (474, 8) 

18:56:39 :	  Processing 73_Patient_2017_Dec_18__11_19_55_297272.h5 

18:56:39 :	  (375, 8) 

18:56:39 :	  Processing 34_Patient_2023-04-04_22:31.h5 

18:56:40 :	  (1442, 8) 

18:56:40 :	  Processing 53_Patient_2023-06-25_21:39.h5 

18:56:40 :	  (21, 8) 

18:56:40 :	  Processing 101_Patient_2023_Nov_9__22_24_41_155873.h5 

18:56:44 :	  (607, 8) 

18:56:44 :	  Processing 90_Patient_2023-03-21_12:19.h5 

18:56:44 :	  (228, 8) 

18:56:44 :	  Processing 50_Patient_2023-06-12_21:10.h5 

18:56:49 :	  (760, 8) 

18:56:49 :	  Processing 35_Patient_2023-04-03_19:51.h5 

18:56:50 :	  

In [3]:
annotation_folder = '/home/ms5267@drexel.edu/moberg-precicecap/ArtifactDetectionEval/data/annotations'
target_folder = '/home/ms5267@drexel.edu/moberg-precicecap/ArtifactDetectionEval/data/filtered_annotations'
group_dict = {
	'ECG': 'Waveforms/ECG_II_Timestamps'
	,'ABP': 'Waveforms/ABP_na_Timestamps'
	,'ART': 'Waveforms/ART_na_Timestamps'
}

hdf5_dir = '/storage/ms5267@drexel.edu/precicecap_downloads/'

# Define a function to check if two time intervals overlap
def is_overlapping(start1, end1, start2, end2):
	return max(start1, start2) < min(end1, end2)

def intersection(start1, end1, start2, end2):
	new_start = max(start1, start2)
	new_end = min(end1, end2)
	return new_start, new_end

for filename in tqdm(os.listdir(annotation_folder)):
	if filename.endswith('.csv'):
		print(f"Processing {filename}")
		
		datafile = os.path.join(annotation_folder, filename)
		df = pd.read_csv(datafile, encoding='utf-8') 

		signal_type = df['Signal_Type'].unique().tolist()
		common_ann = []

		hdf5_file = hdf5_dir + filename[:-16]

		print(hdf5_file)
		
		for s in signal_type:
			df_s = df[df['Signal_Type']==s]
			df_sorted = df_s.sort_values(by='Start_Time')
			with h5py.File(hdf5_file, 'r') as filehdf5:		
				timestamp_raw = filehdf5[group_dict[s]][:]
			
			for i in range(len(df_sorted)):
				row_i = df_sorted.iloc[i]
				start_ts, end_ts = row_i['Start_Time'], row_i['End_Time']
				flag = 0
				annotated_by = row_i['Session']

				for j in range(len(df_sorted)):
					row_j = df_sorted.iloc[j]
					# Compare based on your conditions, e.g., check if intervals overlap
					if row_i['Signal_Type'] == row_j['Signal_Type'] and is_overlapping(start_ts, end_ts, row_j['Start_Time'], row_j['End_Time']) and row_i['Session']!=row_j['Session']:
												
						start_ts, end_ts = intersection(start_ts, end_ts, row_j['Start_Time'], row_j['End_Time'])
						annotated_by= annotated_by + '|' + row_j['Session']
						flag = 1
				
				if flag==1:
					start_ts = int(start_ts*1e3)
					end_ts = int(end_ts*1e3)
					ann_start_idx, ann_end_idx = find_idx_from_ts(timestamp_raw, start_ts, end_ts)
					common_ann.append([ann_start_idx, ann_end_idx, int(start_ts), int(end_ts),annotated_by, row_i['Signal_Type']])
	
	print(len(common_ann))

	destination_file = target_folder + '/' + filename
	# Open the file in write mode
	with open(destination_file, mode='w', newline='') as file:
		writer = csv.writer(file)
		
		# Write each row to the CSV file
		for row in common_ann:
			writer.writerow(row)

	print(f"Data written to {destination_file} successfully.\n ================")

	

  0%|          | 0/14 [00:00<?, ?it/s]

Processing 50_Patient_2023-06-12_21:10.h5-annotations.csv
/storage/ms5267@drexel.edu/precicecap_downloads/50_Patient_2023-06-12_21:10.h5
455
Data written to /home/ms5267@drexel.edu/moberg-precicecap/ArtifactDetectionEval/data/filtered_annotations/50_Patient_2023-06-12_21:10.h5-annotations.csv successfully.
Processing 35_Patient_2023-04-03_19:51.h5-annotations.csv
/storage/ms5267@drexel.edu/precicecap_downloads/35_Patient_2023-04-03_19:51.h5
266
Data written to /home/ms5267@drexel.edu/moberg-precicecap/ArtifactDetectionEval/data/filtered_annotations/35_Patient_2023-04-03_19:51.h5-annotations.csv successfully.
Processing 59_Patient_2022-01-31_23:19.h5-annotations.csv
/storage/ms5267@drexel.edu/precicecap_downloads/59_Patient_2022-01-31_23:19.h5
323
Data written to /home/ms5267@drexel.edu/moberg-precicecap/ArtifactDetectionEval/data/filtered_annotations/59_Patient_2022-01-31_23:19.h5-annotations.csv successfully.
Processing 34_Patient_2023-04-04_22:31.h5-annotations.csv
/storage/ms5267@dr

/storage/ms5267@drexel.edu/precicecap_downloads/
