<a href="https://colab.research.google.com/github/Harshithaprudhivi/Team-15-project/blob/main/ECG_Synthesizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install wfdb neurokit2 biosppy

Collecting wfdb
  Downloading wfdb-4.3.0-py3-none-any.whl.metadata (3.8 kB)
Collecting neurokit2
  Downloading neurokit2-0.2.11-py2.py3-none-any.whl.metadata (37 kB)
Collecting biosppy
  Downloading biosppy-2.2.3-py2.py3-none-any.whl.metadata (6.0 kB)
Collecting pandas>=2.2.3 (from wfdb)
  Downloading pandas-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting bidict (from biosppy)
  Downloading bidict-0.23.1-py3-none-any.whl.metadata (8.7 kB)
Collecting shortuuid (from biosppy)
  Downloading shortuuid-1.0.13-py3-none-any.whl.metadata (5.8 kB)
Collecting mock (from biosppy)
  Downloading mock-5.2.0-py3-none-any.whl.metadata (3.1 kB)
Downloading wfdb-4.3.0-py3-none-any.whl (163 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/163.8 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading neurokit2-0.

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import wfdb, neurokit2 as nk
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, TimeDistributed, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import os

# New section

In [None]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive/')


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import os
import wfdb
import numpy as np

def convert_all_wfdb_dirs(base_input_dirs, base_output_root):
    for input_dir in base_input_dirs:
        rel_dir_name = os.path.basename(input_dir.rstrip('/'))
        output_dir = os.path.join(base_output_root, f'npy_{rel_dir_name}')
        os.makedirs(output_dir, exist_ok=True)

        for root, _, files in os.walk(input_dir, topdown=True, followlinks=False):
            for file in files:
                if file.endswith('.hea'):
                    record_path = os.path.join(root, file[:-4])
                    rel_path = os.path.relpath(record_path, input_dir)
                    save_path = os.path.join(output_dir, f'{rel_path}.npy')

                    os.makedirs(os.path.dirname(save_path), exist_ok=True)

                    try:
                        print(f"Reading: {record_path}")
                        record = wfdb.rdrecord(record_path)

                        if record.p_signal is None:
                            print(f"Skipping: {record_path} — p_signal is None")
                            continue

                        if record.p_signal.nbytes > 1e9:  # >1GB, you can adjust this
                            print(f"Warning: {record_path} — p_signal too large to save safely")
                            continue

                        print(f"Saving to: {save_path}")
                        np.save(save_path, record.p_signal)
                        print(f"Saved: {save_path}")

                    except Exception as e:
                        print(f"Failed: {record_path} — {e}")


In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

In [None]:
# Define paths to both datasets
base_dirs = {
    "100": "/content/drive/MyDrive/PTB-data/npy_records100",
    "500": "/content/drive/MyDrive/PTB-data/npy_records500"
}

In [None]:

BATCH_SIZE = 1000

In [None]:
# Final merged dataset
X_all, Y_all = [], []

In [None]:
def bandpass_filter(ecg): return ecg

In [None]:
def normalize(ecg):
    max_val = np.max(np.abs(ecg), axis=0, keepdims=True)
    return ecg / np.where(max_val == 0, 1, max_val)

In [None]:
def extract_input_output(ecg): return ecg[:, :3], ecg  # I, II, V2


In [None]:

for label, data_folder in base_dirs.items():
    print(f"\n🔍 Starting processing for {label} Hz folder")

    # Gather all .npy files
    file_list = []
    for root, _, files in os.walk(data_folder):
        for f in files:
            if f.endswith('.npy'):
                file_list.append(os.path.join(root, f))
    print(f"✅ Found {len(file_list)} .npy files in {label} Hz folder")

    # Batch processing
    num_batches = (len(file_list) + BATCH_SIZE - 1) // BATCH_SIZE

    for batch_idx in range(num_batches):
        start = batch_idx * BATCH_SIZE
        end = min(start + BATCH_SIZE, len(file_list))
        batch_files = file_list[start:end]

        X_batch, Y_batch = [], []

        print(f"\n🚀 [{label}Hz] Processing batch {batch_idx + 1}/{num_batches}")

        for file_path in tqdm(batch_files):
            try:
                ecg = np.load(file_path)

                if ecg.ndim != 2 or ecg.shape[1] != 12:
                    print(f"⚠️ Skipped: {file_path} (Invalid shape: {ecg.shape})")
                    continue

                ecg = normalize(bandpass_filter(ecg))
                x, y = extract_input_output(ecg)

                X_batch.append(x)
                Y_batch.append(y)

            except Exception as e:
                print(f"❌ Failed to process {file_path}: {e}")

        # Convert to arrays (stay in RAM)
        X_batch = np.array(X_batch)
        Y_batch = np.array(Y_batch)

        print(f"✅ Processed batch {batch_idx + 1}/{num_batches} — Shape: {X_batch.shape}")

        # 🔸 At this point, you can train or analyze on this batch in-memory
        # Example: model.train_on_batch(X_batch, Y_batch)


🔍 Starting processing for 100 Hz folder
✅ Found 21847 .npy files in 100 Hz folder

🚀 [100Hz] Processing batch 1/22


100%|██████████| 1000/1000 [00:50<00:00, 19.78it/s]


✅ Processed batch 1/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 2/22


100%|██████████| 1000/1000 [04:20<00:00,  3.85it/s]


✅ Processed batch 2/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 3/22


100%|██████████| 1000/1000 [04:21<00:00,  3.82it/s]


✅ Processed batch 3/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 4/22


100%|██████████| 1000/1000 [04:10<00:00,  3.99it/s]


✅ Processed batch 4/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 5/22


100%|██████████| 1000/1000 [04:14<00:00,  3.92it/s]


✅ Processed batch 5/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 6/22


100%|██████████| 1000/1000 [04:12<00:00,  3.95it/s]


✅ Processed batch 6/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 7/22


100%|██████████| 1000/1000 [04:16<00:00,  3.91it/s]


✅ Processed batch 7/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 8/22


100%|██████████| 1000/1000 [04:18<00:00,  3.87it/s]


✅ Processed batch 8/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 9/22


100%|██████████| 1000/1000 [04:13<00:00,  3.94it/s]


✅ Processed batch 9/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 10/22


100%|██████████| 1000/1000 [04:17<00:00,  3.89it/s]


✅ Processed batch 10/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 11/22


100%|██████████| 1000/1000 [04:06<00:00,  4.06it/s]


✅ Processed batch 11/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 12/22


100%|██████████| 1000/1000 [04:11<00:00,  3.97it/s]


✅ Processed batch 12/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 13/22


100%|██████████| 1000/1000 [04:13<00:00,  3.94it/s]


✅ Processed batch 13/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 14/22


100%|██████████| 1000/1000 [04:14<00:00,  3.93it/s]


✅ Processed batch 14/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 15/22


100%|██████████| 1000/1000 [04:10<00:00,  3.99it/s]


✅ Processed batch 15/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 16/22


100%|██████████| 1000/1000 [04:15<00:00,  3.91it/s]


✅ Processed batch 16/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 17/22


100%|██████████| 1000/1000 [04:08<00:00,  4.02it/s]


✅ Processed batch 17/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 18/22


100%|██████████| 1000/1000 [04:01<00:00,  4.15it/s]


✅ Processed batch 18/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 19/22


100%|██████████| 1000/1000 [04:04<00:00,  4.09it/s]


✅ Processed batch 19/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 20/22


100%|██████████| 1000/1000 [04:05<00:00,  4.08it/s]


✅ Processed batch 20/22 — Shape: (1000, 1000, 3)

🚀 [100Hz] Processing batch 21/22


  3%|▎         | 32/1000 [00:07<04:06,  3.93it/s]