## **Mount Drive**

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


## **Installations**

In [None]:
!pip install PyWavelets
!pip install wfdb

Collecting PyWavelets
  Downloading pywavelets-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Downloading pywavelets-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/4.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m4.5/4.5 MB[0m [31m165.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m85.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyWavelets
Successfully installed PyWavelets-1.8.0
Collecting wfdb
  Downloading wfdb-4.2.0-py3-none-any.whl.metadata (3.7 kB)
Collecting pandas>=2.2.3 (from wfdb)
  Downloading pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m7.2 MB/s[0m eta [36m0:00:0

## **Imports**

In [None]:
import os

import numpy as np
import pandas as pd

import wfdb
import ast

from tqdm import tqdm

In [None]:
import pywt
from scipy.signal import spectrogram

In [None]:
from PIL import Image
from matplotlib import cm

## **Functions**

In [None]:
def remove_noise_drift(x, wavelet="db4"):

    samples_num, channels_num = x.shape
    x_filtered = np.zeros((samples_num, channels_num))

    max_level = pywt.dwt_max_level(samples_num, pywt.Wavelet(wavelet).dec_len)

    for i in range(channels_num):
        coefficients = pywt.wavedec(x[:, i], wavelet, level=max_level)

        sigma = np.median(np.abs(coefficients[-1])) / 0.6745
        T = sigma * np.sqrt(2 * np.log(samples_num))

        denoised_coefficients = [pywt.threshold(c, T, mode="soft") if i > 0 else c for i, c in enumerate(coefficients)]
        denoised_coefficients[0] = np.zeros_like(denoised_coefficients[0])

        x_filtered[:, i] = pywt.waverec(denoised_coefficients, wavelet)

    return x_filtered

In [None]:
def get_spectrograms(signal, fs, window="hann", nperseg=128, noverlap=120):
    f, t, Sxx = spectrogram(signal, fs=fs, window=window, nperseg=nperseg, noverlap=noverlap, axis=0)
    Sxx = np.swapaxes(Sxx, 0, 1)
    return f, t, Sxx

In [None]:
def spectrograms_to_images(Sxx, size=(256, 256)):
    jet_cmap = cm.get_cmap("jet")

    images = []
    for sxx in Sxx:
        image = (jet_cmap(sxx[::-1] / 255)[:, :, :3] * 255).astype("uint8")
        image = Image.fromarray(image).resize(size)
        images.append(image)

    return images

In [None]:
def get_scalograms(signal, fs, wavelet="cmor1.5-1.0"):
    scales = np.geomspace(1, 128, num=100)

    Sxx_cwt, frequencies = [], []

    for channel in signal.T:
        sxx_cwt, f = pywt.cwt(channel, scales=scales, wavelet="cmor1.5-1.0", sampling_period=1/fs)
        Sxx_cwt.append(sxx_cwt[::-1])
        frequencies.append(f)

    return Sxx_cwt, frequencies

In [None]:
def scalograms_to_images(coefficients, size=(256, 256)):

    jet_cmap = cm.get_cmap("jet")
    images = []

    scalogram = np.abs(coefficients)
    scalogram = (scalogram - scalogram.min()) / (scalogram.max() - scalogram.min()) * 255
    scalogram = scalogram.astype("uint8")

    for row in scalogram:
        image = (jet_cmap(row[::-1] / 255)[:, :, :3] * 255).astype("uint8")
        image = Image.fromarray(image).resize(size)
        images.append(image)

    return images

## **Upload Data**

In [None]:
def load_raw_data(df, path, fs=100):
    rate = "filename_lr" if fs == 100 else "filename_hr"
    data = [wfdb.rdsamp(f"{path}/{f}") for f in df[rate]]
    data = np.array([signal for signal, metadata in data])
    return data

In [None]:
# Load statements
Y = pd.read_csv("/content/drive/MyDrive/Thesis/ptbxl/ptbxl_database.csv", index_col="ecg_id", engine="python")
Y["scp_codes"] = Y["scp_codes"].map(ast.literal_eval)

In [None]:
# Read ECG diagnostic statements (fs=100Hz)
X = []
for file in tqdm(Y["filename_lr"]):
    x = wfdb.rdsamp(f"/content/drive/MyDrive/Thesis/ptbxl/{file}")
    x = np.array(x[0])
    X.append(x)

100%|██████████| 21799/21799 [3:41:23<00:00,  1.64it/s]


In [None]:
# Load scp_statements.csv for diagnostic aggregation
scp_statements = pd.read_csv("/content/drive/MyDrive/Thesis/ptbxl/scp_statements.csv", index_col=0)
scp_statements = scp_statements[scp_statements["diagnostic"] == 1]

In [None]:
statements_mapping = lambda statements: list({
    scp_statements.loc[statement, "diagnostic_class"]
        for statement in statements
            if  statement in scp_statements.index
})

In [None]:
Y["diagnostic_superclass"] = Y["scp_codes"].agg(list).map(statements_mapping)

  Y["diagnostic_superclass"] = Y["scp_codes"].agg(list).map(statements_mapping)


## **Prepare signals**

In [None]:
validation_fold = 9
test_fold = 10

In [None]:
# Train
X_train = X[
    (Y["strat_fold"] != validation_fold) &
    (Y["strat_fold"] != test_fold)
]
y_train = Y[
    (Y["strat_fold"] != validation_fold) &
    (Y["strat_fold"] != test_fold)
]["diagnostic_superclass"]

In [None]:
# Validation
X_validation = X[Y["strat_fold"] == validation_fold]
y_validation = Y[Y["strat_fold"] == validation_fold]["diagnostic_superclass"]

In [None]:
# Test
X_test = X[Y["strat_fold"] == test_fold]
y_test = Y[Y["strat_fold"] == test_fold]["diagnostic_superclass"]

In [None]:
os.mkdir("signals100")
os.mkdir("signals100/train")
os.mkdir("signals100/validation")
os.mkdir("signals100/test")

In [None]:
for i, signal in enumerate(tqdm(X_train)):
    np.save(f"signals100/train/{i}.npy",  signal)

  0%|          | 0/17418 [00:00<?, ?it/s]

(1000, 12)





In [None]:
for i, signal in enumerate(tqdm(X_validation)):
    np.save(f"signals100/validation/{i}.npy",  signal)

100%|██████████| 2183/2183 [00:02<00:00, 993.46it/s] 


In [None]:
for i, signal in enumerate(tqdm(X_test)):
    np.save(f"signals100/test/{i}.npy",  signal)

100%|██████████| 2198/2198 [00:02<00:00, 790.65it/s] 


In [None]:
!zip -r /content/signals100.zip /content/signals100
!cp -r /content/signals100.zip /content/drive/MyDrive/Thesis/data

## **Filter signals**

In [None]:
def filter_signals(purpose):

    X_filtered = []

    if not os.path.exists("signals100filtered"):
        os.mkdir("signals100filtered")

    os.mkdir(f"signals100filtered/{purpose}")
    files = os.listdir(f"signals/{purpose}/")

    for file in tqdm(files):
        x = np.load(f"signals100/{purpose}/{file}")
        x_filtered = remove_noise_drift(x)
        np.save(f"signals100filtered/{purpose}/{file}", x_filtered)

        X_filtered.append(x_filtered)

    return X_filtered

In [None]:
X_train_filtered = filter_signals(purpose="train")


invalid value encountered in divide

100%|██████████| 17418/17418 [02:12<00:00, 131.24it/s]


In [None]:
X_validation_filtered = filter_signals(purpose="validation")

100%|██████████| 2183/2183 [00:18<00:00, 119.76it/s]


In [None]:
X_test_filtered = filter_signals(purpose="test")

100%|██████████| 2198/2198 [00:20<00:00, 107.62it/s]


In [None]:
!zip -r /content/signals100filtered.zip /content/signals100filtered
!cp -r /content/signals100filtered.zip /content/drive/MyDrive/Thesis/data

[1;30;43mПоказано результат, скорочений до останніх рядків (5000).[0m
  adding: content/signals100filtered/train/5669.npy (deflated 4%)
  adding: content/signals100filtered/train/5812.npy (deflated 4%)
  adding: content/signals100filtered/train/16556.npy (deflated 4%)
  adding: content/signals100filtered/train/15664.npy (deflated 4%)
  adding: content/signals100filtered/train/6339.npy (deflated 4%)
  adding: content/signals100filtered/train/11970.npy (deflated 4%)
  adding: content/signals100filtered/train/7174.npy (deflated 4%)
  adding: content/signals100filtered/train/14897.npy (deflated 4%)
  adding: content/signals100filtered/train/10928.npy (deflated 4%)
  adding: content/signals100filtered/train/10135.npy (deflated 4%)
  adding: content/signals100filtered/train/14739.npy (deflated 4%)
  adding: content/signals100filtered/train/1525.npy (deflated 4%)
  adding: content/signals100filtered/train/8189.npy (deflated 4%)
  adding: content/signals100filtered/train/9192.npy (deflated 4

## **Create spectrograms and scaleograms**

In [None]:
os.mkdir("/content/spectrograms")
os.mkdir("/content/spectrograms/train")
os.mkdir("/content/spectrograms/validation")
os.mkdir("/content/spectrograms/test")

In [None]:
def create_spectrograms(X, purpose):

    path = f"/content/spectrograms/{purpose}"

    for i, signal in enumerate(tqdm(X)):
        f, t, Sxx = get_spectrograms(signal, fs=100, window="hann", nperseg=128, noverlap=127)

        Sxx = 10 * np.log10(Sxx + 1e-10)
        Sxx = (Sxx - Sxx.min()) / (Sxx.max() - Sxx.min()) * 255
        Sxx = Sxx.astype("uint8")

        images = spectrograms_to_images(Sxx, size=(256, 256))

        path_dir = f"{path}/{i}"  # make directory for spectrograms

        os.mkdir(path_dir)

        for j, image in enumerate(images):
            image.save(f"{path_dir}/{j}.jpg")

In [None]:
create_spectrograms(X_train_filtered, purpose="train")

  jet_cmap = cm.get_cmap("jet")
  Sxx = Sxx.astype("uint8")
100%|██████████| 17418/17418 [39:55<00:00,  7.27it/s]


In [None]:
create_spectrograms(X_validation_filtered, purpose="validation")

  jet_cmap = cm.get_cmap("jet")
100%|██████████| 2183/2183 [04:56<00:00,  7.37it/s]


In [None]:
create_spectrograms(X_test_filtered, purpose="test")

  jet_cmap = cm.get_cmap("jet")
100%|██████████| 2198/2198 [04:56<00:00,  7.41it/s]


In [None]:
!zip -r /content/spectrograms.zip /content/spectrograms
!cp -r /content/spectrograms.zip /content/drive/MyDrive/Thesis/data

[1;30;43mПоказано результат, скорочений до останніх рядків (5000).[0m
  adding: content/spectrograms/validation/1461/6.jpg (deflated 2%)
  adding: content/spectrograms/validation/1461/9.jpg (deflated 2%)
  adding: content/spectrograms/validation/1461/7.jpg (deflated 2%)
  adding: content/spectrograms/validation/1461/2.jpg (deflated 2%)
  adding: content/spectrograms/validation/1461/11.jpg (deflated 2%)
  adding: content/spectrograms/validation/1461/1.jpg (deflated 1%)
  adding: content/spectrograms/validation/1461/10.jpg (deflated 2%)
  adding: content/spectrograms/validation/1461/3.jpg (deflated 1%)
  adding: content/spectrograms/validation/337/ (stored 0%)
  adding: content/spectrograms/validation/337/4.jpg (deflated 1%)
  adding: content/spectrograms/validation/337/8.jpg (deflated 1%)
  adding: content/spectrograms/validation/337/5.jpg (deflated 2%)
  adding: content/spectrograms/validation/337/0.jpg (deflated 1%)
  adding: content/spectrograms/validation/337/6.jpg (deflated 2%)
 

In [None]:
os.mkdir("/content/scaleograms")
os.mkdir("/content/scaleograms/train")
os.mkdir("/content/scaleograms/validation")
os.mkdir("/content/scaleograms/test")

In [None]:
def create_scaleograms(X, purpose):

    path = f"/content/scaleograms/{purpose}"

    for i, signal in enumerate(tqdm(X)):
        coefficients, frequencies = get_scalograms(signal, fs=100, wavelet="cmor1.5-1.0")
        images = scalograms_to_images(coefficients, size=(256, 256))

        path_dir = f"{path}/{i}"  # make directory for scaleograms

        os.mkdir(path_dir)

        for j, image in enumerate(images):
            image.save(f"{path_dir}/{j}.jpg")

In [None]:
create_scaleograms(X_train_filtered, purpose="train")

  jet_cmap = cm.get_cmap("jet")
  scalogram = scalogram.astype("uint8")
100%|██████████| 17418/17418 [2:16:04<00:00,  2.13it/s]


In [None]:
create_scaleograms(X_validation_filtered, purpose="validation")

  jet_cmap = cm.get_cmap("jet")
100%|██████████| 2183/2183 [17:07<00:00,  2.13it/s]


In [None]:
create_scaleograms(X_test_filtered, purpose="test")

  jet_cmap = cm.get_cmap("jet")
100%|██████████| 2198/2198 [17:17<00:00,  2.12it/s]


In [None]:
!zip -r /scaleograms.zip /content/scaleograms
!cp -r /content/scaleograms.zip /content/drive/MyDrive/Thesis/data

[1;30;43mПоказано результат, скорочений до останніх рядків (5000).[0m
  adding: content/scaleograms/validation/1461/6.jpg (deflated 2%)
  adding: content/scaleograms/validation/1461/9.jpg (deflated 2%)
  adding: content/scaleograms/validation/1461/7.jpg (deflated 2%)
  adding: content/scaleograms/validation/1461/2.jpg (deflated 2%)
  adding: content/scaleograms/validation/1461/11.jpg (deflated 3%)
  adding: content/scaleograms/validation/1461/1.jpg (deflated 4%)
  adding: content/scaleograms/validation/1461/10.jpg (deflated 2%)
  adding: content/scaleograms/validation/1461/3.jpg (deflated 2%)
  adding: content/scaleograms/validation/337/ (stored 0%)
  adding: content/scaleograms/validation/337/4.jpg (deflated 3%)
  adding: content/scaleograms/validation/337/8.jpg (deflated 2%)
  adding: content/scaleograms/validation/337/5.jpg (deflated 3%)
  adding: content/scaleograms/validation/337/0.jpg (deflated 3%)
  adding: content/scaleograms/validation/337/6.jpg (deflated 3%)
  adding: conte