In [56]:
import os
import wfdb
import wfdb.processing
import matplotlib.pyplot as plt
import numpy as np
import keras
import tensorflow as tf
import copy

# LTDB
db_dir_in_docker = 'long-term-af-database-1.0.0/files'
AFIB_symbol = '(AFIB'
non_AFIB_symbols = [ '(AB','(IVR', '(SBR', '(SVTA', '(T', '(VT']
mistakes = ['M', 'MB', 'MISSB', 'PSE']
output_dir = 'lt_2_db'
output_dir_prefix = 'lt_2_'
lt_not = ['112', '122', '208', '62', '201', '118', '114', '201', '206', '119', '101', '207'
          , '21', '24', '53', '45', '110', '74', '105', '100', '32', '22', '51', '55', '111', '49', '121', '56']
# wszystkie ['', '\x01 Aux', '(AB', '(AFIB', '(B', '(IVR', '(N', '(SBR', '(SVTA', '(T', '(VT', 'M', 'MB', 'MISSB', 'PSE']

# # SHDB
# db_dir_in_docker = 'shdb-af-a-japanese-holter-ecg-database-of-atrial-fibrillation-1.0.0'
# AFIB_symbol = '(AFIB'
# non_AFIB_symbols = ['(AFL', '(AT', '(PAT', '(NOD']
# output_dir = 'sh_2_db'
# output_dir_prefix = 'sh_2_'

# # AFDB
# db_dir_in_docker = 'mit-bih-atrial-fibrillation-database-1.0.0/files'
# AFIB_symbol = '(AFIB'
# non_AFIB_symbols = ['(AFL', '(J']
# output_dir = 'mit_2_db'
# output_dir_prefix = 'mit_2_'

In [57]:
def parse_rhythm_intervals(ann):

    rhythm_indices = [i for i, aux in enumerate(ann.aux_note) if len(aux.strip()) > 0]

    intervals = []
    for i in range(len(rhythm_indices)):
        start_idx = ann.sample[rhythm_indices[i]]
        label = ann.aux_note[rhythm_indices[i]]

        if i < len(rhythm_indices) - 1:
            end_idx = ann.sample[rhythm_indices[i+1]] - 1
        else:
            end_idx = ann.sample[-1] if len(ann.sample) > 0 else start_idx

        intervals.append((start_idx, end_idx, label.strip()))
    return intervals

def find_indices_in_range(array, lower_bound, upper_bound):
    from bisect import bisect_left, bisect_right

    start_index = bisect_left(array, lower_bound)
    end_index = bisect_right(array, upper_bound)

    return list(range(start_index, end_index))

In [58]:
files = os.listdir(db_dir_in_docker)
files = [f for f in files if f.endswith('.dat') or f.endswith('.hea') or f.endswith('.atr') or f.endswith('.qrs')]

files_grouped = {}
for file in files:
    number = file.split('.')[0]
    if number in files_grouped:
        files_grouped[number].append(file)
    else:
        files_grouped[number] = [file]

keys_tab = list(files_grouped.keys())

In [59]:
print(keys_tab)
# keys_tab = keys_tab[0:3]
print(keys_tab)
print(len(keys_tab))

['30', '11', '62', '06', '118', '38', '26', '203', '71', '65', '01', '25', '100', '33', '120', '75', '122', '32', '22', '114', '201', '51', '05', '43', '07', '206', '205', '03', '69', '35', '55', '116', '112', '08', '60', '208', '111', '19', '200', '204', '49', '64', '20', '48', '119', '115', '37', '68', '39', '28', '101', '23', '12', '70', '113', '58', '102', '44', '202', '207', '15', '21', '42', '10', '24', '117', '104', '54', '53', '121', '56', '103', '45', '34', '47', '13', '110', '74', '16', '17', '72', '00', '105', '18']
['30', '11', '62', '06', '118', '38', '26', '203', '71', '65', '01', '25', '100', '33', '120', '75', '122', '32', '22', '114', '201', '51', '05', '43', '07', '206', '205', '03', '69', '35', '55', '116', '112', '08', '60', '208', '111', '19', '200', '204', '49', '64', '20', '48', '119', '115', '37', '68', '39', '28', '101', '23', '12', '70', '113', '58', '102', '44', '202', '207', '15', '21', '42', '10', '24', '117', '104', '54', '53', '121', '56', '103', '45', '3

In [60]:
# liissst = []

for record_name in keys_tab:

    if record_name in lt_not:
        continue

    record_path = os.path.join(db_dir_in_docker, record_name)
    
    # sygnał
    try:
        record = wfdb.rdrecord(record_path)
    except:
        continue
    signal = record.p_signal

    # QRSy
    qrs_data = wfdb.rdann(record_path, 'qrs')
    qrs_data_indexs = qrs_data.sample

    # częstotliwość
    fs = record.fs

    # interwały RR
    rr = wfdb.processing.calc_rr(qrs_data_indexs, fs=fs, min_rr=None, max_rr=None, qrs_units='samples', rr_units='seconds')

    # adnotacje
    ann = wfdb.rdann(record_path, 'atr')

    # print(ann.aux_note)

    # print("Annotation types:", np.unique(ann.aux_note))

    # for type in np.unique(ann.aux_note):
    #     liissst.append(type)

    # liissst = np.unique(liissst)
    # liissst = liissst.tolist()
    # print(liissst)

    ann_parsed = parse_rhythm_intervals(ann)

    # print(ann_parsed)

    output =  np.zeros((len(rr), 3))
    output[:, 0] = rr

    # wpisanie afiba na 1
    for elem in ann_parsed:
        if elem[2] == AFIB_symbol:
            indices = find_indices_in_range(qrs_data_indexs, elem[0], elem[1])

            try:
                indices.remove(len(rr))
            except:
                pass

            output[indices, 1] = 1

        if elem[2] in non_AFIB_symbols:
            indices = find_indices_in_range(qrs_data_indexs, elem[0], elem[1])

            try:
                indices.remove(len(rr))
            except:
                pass

            output[indices, 2] = 1

    # # zapis do pliku .npy
    # np.save("{}/{}{}.npy".format(output_dir, output_dir_prefix, record_name), output)

    # # kontrolny print i plt.show
    # print("Max RR: {}, Min RR: {}".format(max(rr), min(rr)))

    # rr_samples = np.arange(len(output[:,1]))
    # plt.figure(figsize=(15, 5))
    # plt.title(record_name)
    # plt.plot(rr_samples, output[:,0], label='RR')
    # plt.plot(rr_samples, output[:,1], label='Label')
    # plt.plot(rr_samples, output[:,2], label='Label not afib')
    # plt.legend()
    # plt.grid()
    # plt.ylim([0 ,5])
    # plt.show()

    # # do plota
    # qrs_data_indexs_y = np.zeros_like(qrs_data_indexs)
    # rr_x = qrs_data_indexs[:-1] 

    # plt.figure(figsize=(12, 6))
    # plt.plot(signal)
    # plt.scatter(
    #     qrs_data_indexs, 
    #     qrs_data_indexs_y,
    #     color='red', 
    #     label='QRS', 
    #     zorder=5)
    # plt.scatter(
    #     rr_x, 
    #     rr,
    #     color='blue', 
    #     label='RR', 
    #     zorder=5)
    # plt.title(f'Przykładowy rekord: {record_name}')
    # plt.xlabel('Czas (próbki)')
    # plt.ylabel('Amplituda')
    # plt.grid(True)
    # ofset = 10000
    # plt.xlim([ofset, ofset+5000])
    # plt.show()


# print(liissst)