# Purpose
Creating EKMs with shifted amount of elements

# Imports and installations

In [None]:
! pip install biosignalsnotebooks

In [None]:
! pip install wfdb

In [None]:
! pip3 install ishneholterlib

In [None]:
! wget http://thew-project.org/document/1-300mECG.zip
! unzip /content/1-300mECG.zip

In [None]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import biosignalsnotebooks as bsnb
from scipy.signal import detrend
import seaborn as sns

In [6]:
from ishneholterlib import Holter
import numpy as np

In [7]:
import os
from datetime import datetime
from PIL import Image
import random

In [8]:
import cv2

In [None]:
! rm -r EKM_dataset_big_EKM/
! rm -r EKM_dataset/

In [10]:
# DON'T RUN!
! mkdir EKM_dataset
! mkdir EKM_dataset/x_lead
! mkdir EKM_dataset/y_lead
! mkdir EKM_dataset/z_lead

In [11]:
! mkdir EKM_dataset_big_EKM
! mkdir EKM_dataset_big_EKM/x_lead
! mkdir EKM_dataset_big_EKM/y_lead
! mkdir EKM_dataset_big_EKM/z_lead

In [12]:
import shutil

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [14]:
len(os.listdir("/content/drive/MyDrive/ECG project/ECG_200"))

402

# Pan tompkins algorithm and preprocessing

In [15]:
# Load a file from disk:
all_data = Holter('/content/1-300m.ecg')
all_data.load_data()



In [16]:
x_lead = all_data.lead[0]
y_lead = all_data.lead[1]
z_lead = all_data.lead[2]

In [17]:
x_lead.data

array([0.058536, 0.058536, 0.014634, ..., 0.19512 , 0.209754, 0.229266])

In [18]:
def process_ecg(unfiltered_ecg, fs):
    # Step 1 of Pan-Tompkins Algorithm - ECG Filtering (Bandpass between 5 and 15 Hz)
    filtered_signal = bsnb.detect._ecg_band_pass_filter(unfiltered_ecg, fs)
    # Step 2 of Pan-Tompkins Algorithm - ECG Differentiation
    differentiated_signal = np.diff(filtered_signal)
    # Step 3 of Pan-Tompkins Algorithm - ECG Rectification
    squared_signal = differentiated_signal * differentiated_signal
    # Step 4 of Pan-Tompkins Algorithm - ECG Integration ( Moving window integration )
    nbr_sampls_int_wind = int(0.080 * fs)
    integrated_signal = np.zeros_like(squared_signal)
    cumulative_sum = squared_signal.cumsum()
    integrated_signal[nbr_sampls_int_wind:] = (cumulative_sum[nbr_sampls_int_wind:] - cumulative_sum[
                                                                                      :-nbr_sampls_int_wind]) / nbr_sampls_int_wind
    integrated_signal[:nbr_sampls_int_wind] = cumulative_sum[:nbr_sampls_int_wind] / np.arange(1, nbr_sampls_int_wind + 1)
    # Initialisation of the R peak detection algorithm
    rr_buffer, signal_peak_1, noise_peak_1, threshold = bsnb.detect._buffer_ini(integrated_signal, fs)
    # Detection of possible and probable R peaks
    probable_peaks, possible_peaks = bsnb.detect._detects_peaks(integrated_signal, fs)
    # Identification of definitive R peaks
    definitive_peaks = bsnb.detect._checkup(probable_peaks, integrated_signal, fs, rr_buffer, signal_peak_1,
                                            noise_peak_1, threshold)
    # Conversion to integer type.
    definitive_peaks = np.array(list(map(int, definitive_peaks)))
    # Correcting step
    map_integers = definitive_peaks - 40 * (fs / 1000)
    definitive_peaks_reph = np.array(list(map(int, map_integers)))
    return definitive_peaks_reph, filtered_signal

In [19]:
# Normalizing method
def normalize(signal):
    a, b = -1, 1
    c = b - a
    aux = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
    norm_ecg = c * aux + a
    return norm_ecg

In [20]:
# Calculates the mean distance between all peaks for each user
def peak_distance(r_peaks):
    dist = []
    for i in range(len(r_peaks)):
        if r_peaks[i] == r_peaks[-1]:
            break
        distance = r_peaks[i + 1] - r_peaks[i]
        if i == 0:
            dist.append(distance)
            continue
        if distance > np.mean(dist) + np.std(dist) * 2:
            continue
        else:
            dist.append(distance)
    return np.mean(dist)

In [21]:
def electrocardiomatrix(distance, r_peaks, filtered_ecg, init_window, peaks_window):
    init_seg = int(0.2 * distance)
    fin_seg = int(1.5 * distance)
    all_segments = []
    for peak in r_peaks[init_window:init_window + peaks_window]:
        if peak - init_seg < 0:
            segment = filtered_ecg[0:peak + fin_seg]
        else:
            segment = filtered_ecg[peak - init_seg:peak + fin_seg]
        all_segments.append(segment[:,np.newaxis])
    if all_segments[0].shape[0] < all_segments[1].shape[0]:
        zeros = np.zeros(int(all_segments[1].shape[0])-int(all_segments[0].shape[0]))[:, np.newaxis]
        new_segment = np.concatenate((zeros, all_segments[0]))
        all_segments[0] = new_segment
    try:
      ecm = np.concatenate(all_segments, 1)
    except ValueError:
      return None
    return ecm.T

In [22]:
def electrocardiomatrix_no_1(filtered_ecg, init_window, sampling_rate, window_size):
  fs = sampling_rate
  window_signal_sample_size = window_size * fs
  each_line_ekm_size = 1 # seconds
  each_line_ekm_sample_signal_size = each_line_ekm_size * fs
  all_segments = []

  for ekm_line in range(window_size):
    segment = filtered_ecg[init_window + (ekm_line * each_line_ekm_sample_signal_size): \
                init_window + ((ekm_line+1) * each_line_ekm_sample_signal_size)]
    all_segments.append(segment)

  ecm = all_segments

  return ecm

In [23]:
# Labeling is in this way that, prelast element of EKM's name is the user's id,
# and the last element is the number of the EKM for that user
def save_ecm(dataset_name, path, key, i):
    # Saving EKMs in format of {path}/_NumberOfbpfsInAEKM_bpf-ekm-{key=user id}-{i=serial Number}
    plt.savefig(f"{path}/10bpf-ekm-{dataset_name}-{key}-{str(i)}",bbox_inches='tight', pad_inches=0)

In [None]:
# Labeling is in this way that, prelast element of EKM's name is the user's id,
# and the last element is the number of the EKM for that user
def save_ecm_no_1(dataset_name, path, key, i):
    # Saving EKMs in format of {path}/_NumberOfbpfsInAEKM_bpf-ekm-{key=user id}-{i=serial Number}
    plt.savefig(f"{path}/{sbf}sbf-ekm-{dataset_name}-{key}-{str(i)}",bbox_inches='tight', pad_inches=0)

In [24]:
def little_ekm_dataset(lead_data, sampling_rate, dataset_name, ekms_path, key):
  print("  .Preprocessing the signal")
  peaks, filtered_ecg = process_ecg(lead_data , sampling_rate)

  print("  .Getting detrend_signal, norm_ecg, distance")
  detrend_signal = detrend(filtered_ecg)
  norm_ecg = normalize(detrend_signal)
  distance = peak_distance(peaks)

  # by fs=200, 2 seconds will be 10 bpf
  # bpf => 2(s) / 200 * 0.001
  bpf = 10
  peaks_window = bpf-1
  data_obtained = []
  distances = []
  negative = True
  ekms_counter, init_window = 0, 0
  total_ecms = 3000

  fig_width_px = 33
  fig_height_px = 21

  print("  .Getting EKMs")
  while(ekms_counter<total_ecms):
    if (init_window >= len(peaks)) or (init_window >= len(peaks)-1): break
    ecm = electrocardiomatrix(distance, peaks, norm_ecg, init_window, peaks_window)
    if ecm is None: break
    distance = int(distance)
    norm_ecm = normalize(ecm)

    fig = plt.figure(num=1, clear=True, figsize=(fig_width_px / 80, fig_height_px / 80))
    ax = fig.add_subplot()
    ax.axis('off')

    sns.heatmap(norm_ecm, xticklabels=False, yticklabels=False, cbar=False)
    # plt.tight_layout()

    save_ecm(dataset_name, ekms_path, key, ekms_counter)
    ekms_counter += 1
    # break

In [None]:
def little_ekm_dataset_shifted(lead_data, sampling_rate, dataset_name, ekms_path, key, shift_amount):
  print("Shifting the raw signal")
  shifted_signal = lead_data[int(shift_amount * sampling_rate):]

  print("  .Preprocessing the signal")
  peaks, filtered_ecg = process_ecg(shifted_signal , sampling_rate)

  print("  .Getting detrend_signal, norm_ecg, distance")
  detrend_signal = detrend(filtered_ecg)
  norm_ecg = normalize(detrend_signal)
  distance = peak_distance(peaks)

  # by fs=200, 2 seconds will be 10 bpf
  # bpf => 2(s) / 200 * 0.001
  # bpf = 10
  # peaks_window = bpf-1

  data_obtained = []
  distances = []
  negative = True
  ekms_counter, init_window = 0, 0
  total_ecms = 3000

  fig_width_px = 33
  fig_height_px = 21

  print("  .Getting EKMs")
  while(ekms_counter<total_ecms):
    if (init_window >= len(peaks)) or (init_window >= len(peaks)-1): break
    ecm = electrocardiomatrix_no_1(distance, peaks, norm_ecg, init_window, peaks_window)
    if ecm is None: break
    distance = int(distance)
    norm_ecm = normalize(ecm)

    fig = plt.figure(num=1, clear=True, figsize=(fig_width_px / 80, fig_height_px / 80))
    ax = fig.add_subplot()
    ax.axis('off')

    sns.heatmap(norm_ecm, xticklabels=False, yticklabels=False, cbar=False)
    # plt.tight_layout()

    save_ecm_no_1(dataset_name, ekms_path, key, ekms_counter)
    ekms_counter += 1
    # break

In [25]:
def big_ekm_dataset(lead_data, sampling_rate, dataset_name, ekms_path, key):
  print("  .Preprocessing the signal")
  peaks, filtered_ecg = process_ecg(lead_data , sampling_rate)

  print("  .Getting detrend_signal, norm_ecg, distance")
  detrend_signal = detrend(filtered_ecg)
  norm_ecg = normalize(detrend_signal)
  distance = peak_distance(peaks)

  # by fs=200, 2 seconds will be 10 bpf
  # bpf => 2(s) / 200 * 0.001
  bpf = 10
  peaks_window = bpf-1
  data_obtained = []
  distances = []
  negative = True
  ekms_counter, init_window = 0, 0
  total_ecms = 3000

  print("  .Getting EKMs")
  while(ekms_counter<total_ecms):
    if (init_window >= len(peaks)) or (init_window >= len(peaks)-1): break
    ecm = electrocardiomatrix(distance, peaks, norm_ecg, init_window, peaks_window)
    if ecm is None: break
    distance = int(distance)
    norm_ecm = normalize(ecm)

    sns.heatmap(norm_ecm, xticklabels=False, yticklabels=False, cbar=False)
    # plt.tight_layout()

    save_ecm(dataset_name, ekms_path, key, ekms_counter)
    init_window += bpf
    ekms_counter += 1
    # break

In [26]:
def pretier_print(pos, userNumber, usr_ecg_file_name):
  if pos == "begin":
    [print("-", end="") for i in range(30)]
    print("")
    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    print(f"-> User No.{userNumber}")
    print("")
    print(usr_ecg_file_name)

  if pos == "end":
    print("")
    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    print("")

## Little shifted EKMs

In [27]:
dataset_path = "/content/drive/MyDrive/ECG project/ECG_200"
users_files = os.listdir(dataset_path)
users_files.remove("clinicalData-selected")
len(users_files)

401

In [29]:
# Getting .ecg files of users
users_ecg_files = []
for _file in users_files:
  f_extention = _file.split(".")[1]
  if f_extention == "ecg":
    users_ecg_files.append(_file)

In [32]:
users_ecg_files[:10]

['10022.ecg',
 '10023.ecg',
 '10047.ecg',
 '10048.ecg',
 '10049.ecg',
 '10050.ecg',
 '10051.ecg',
 '10062.ecg',
 '10064.ecg',
 '10066.ecg']

In [30]:
# Initialization of dataset extracting processing
sampling_rate = all_data.sr
dataset_name = "main_shifted_ekm_dataset"
base_ekms_path = f'EKM_dataset'

shift_amount = 0.5 # seconds

lead_names_dict = {
    1: "x_lead",
    2: "y_lead",
    3: "z_lead"
}

In [31]:
def user_EKMs_dir_creator():
  # Removing previous EKM dir and creating new one
  ! rm -r EKM_dataset/
  ! mkdir EKM_dataset
  ! mkdir EKM_dataset/x_lead
  ! mkdir EKM_dataset/y_lead
  ! mkdir EKM_dataset/z_lead

In [None]:
# Getting EKMs of each lead of users from .ecg files
for ecg_file in users_ecg_files:
  user_EKMs_dir_creator()

  ecg_file_path = dataset_path + "/" + ecg_file
  user_leads_all_data = Holter(ecg_file_path)
  user_leads_all_data.load_data()

  x_lead = user_leads_all_data.lead[0]
  y_lead = user_leads_all_data.lead[1]
  z_lead = user_leads_all_data.lead[2]

  user_leads_signals = [x_lead, y_lead, z_lead]
  user_id = ecg_file.split(".")[0]
  sampling_rate = user_leads_all_data.sr

  for _, lead_data in enumerate(user_leads_signals):
    name_of_file = ecg_file + ": " + lead_names_dict[_ + 1]
    pretier_print("begin", int(user_id), name_of_file)

    lead_path = base_ekms_path + "/" + lead_names_dict[_ + 1]
    little_ekm_dataset_shifted(lead_data.data, sampling_rate, dataset_name, lead_path, user_id, shift_amount)

    pretier_print("end", int(user_id), ecg_file)

  shutil.make_archive(user_id, format='zip', root_dir='/content/EKM_dataset')
  source_file_path = f"/content/{user_id}.zip"
  destination_directory = f"/content/drive/MyDrive/ECG project/IMD project/{user_id}.zip"
  shutil.move(source_file_path, destination_directory)