<a href="https://colab.research.google.com/github/Harshithaprudhivi/Team-15-project/blob/main/ECG_Synthesizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install wfdb neurokit2 biosppy



In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import wfdb, neurokit2 as nk
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, TimeDistributed, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import os

# New section

In [None]:
from google.colab import drive
from pathlib import Path
drive.mount('/content/drive/')


Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:

def convert_all_wfdb_dirs(base_input_dirs, base_output_root):
    for input_dir in base_input_dirs:
        # Keep relative structure for clean output management
        rel_dir_name = os.path.basename(input_dir.rstrip('/'))
        output_dir = os.path.join(base_output_root, f'npy_{rel_dir_name}')
        os.makedirs(output_dir, exist_ok=True)

        for root, _, files in os.walk(input_dir):
            for file in files:
                if file.endswith('.hea'):
                    record_path = os.path.join(root, file[:-4])
                    rel_path = os.path.relpath(record_path, input_dir)
                    save_path = os.path.join(output_dir, f'{rel_path}.npy')

                    os.makedirs(os.path.dirname(save_path), exist_ok=True)

                    try:
                        record = wfdb.rdrecord(record_path)
                        np.save(save_path, record.p_signal)
                    except Exception as e:
                        print(f"Failed: {record_path} â€” {e}")

# ðŸ§  Use it like this:
convert_all_wfdb_dirs(
    base_input_dirs=[
        '/content/drive/MyDrive/PTB-data/records500',
        '/content/drive/MyDrive/PTB-data/records100'
    ],
    base_output_root='/content/drive/MyDrive/PTB-data'
)


In [None]:

meta = pd.read_csv('/content/drive/MyDrive/PTB-data/ptbxl_database.csv')
meta = meta[meta['scp_codes'].notnull()].reset_index(drop=True)


In [None]:
def generate_paths(row):
    base_500 = '/content/drive/MyDrive/PTB-data/npy_records500'
    base_100 = '/content/drive/MyDrive/PTB-data/npy_records100'
    filename = row['filename_hr'] + '.npy'
    return {
        '500': os.path.join(base_500, filename),
        '100': os.path.join(base_100, filename)
    }

meta['paths'] = meta.apply(generate_paths, axis=1)

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
import neurokit2 as nk

# --- Define core preprocessing functions ---
def bandpass_filter(signal):
    return nk.signal_filter(signal, sampling_rate=500, lowcut=0.5, highcut=40)

def normalize(signal):
    return StandardScaler().fit_transform(signal)

def extract_input_output(ecg):
    X = ecg[:, [0, 1, 5]]                     # Leads I, II, V2
    Y = np.delete(ecg, [0, 1, 5], axis=1)     # Remaining 9 leads
    return X, Y

# --- Load metadata ---
meta = pd.read_csv('/content/drive/MyDrive/PTB-data/ptbxl_database.csv')  # Adjust path if needed

def generate_paths(row):
    fixed_path = row['filename_hr'].replace("records500/", "") + '.npy'
    return {
        '500': os.path.join('/content/drive/MyDrive/PTB-data/npy_records500', fixed_path),
        '100': os.path.join('/content/drive/MyDrive/PTB-data/npy_records100', fixed_path)
    }

# --- Apply path correction ---
meta['paths'] = meta.apply(generate_paths, axis=1)

# --- Preprocess both datasets ---
X_500, Y_500 = [], []
X_100, Y_100 = [], []

for row in meta['paths'][:1000]:
    for hz, path in row.items():
        try:
            if not os.path.exists(path):
                print(f"Missing: {hz}Hz - {path}")
                continue

            ecg = np.load(path)

            if ecg.shape != (5000, 12):
                print(f"Unexpected shape [{hz}Hz]: {path}, {ecg.shape}")
                continue

            ecg = normalize(bandpass_filter(ecg))
            x, y = extract_input_output(ecg)

            if hz == '500':
                X_500.append(x)
                Y_500.append(y)
            else:
                X_100.append(x)
                Y_100.append(y)

        except Exception as e:
            print(f"Skip [{hz}Hz]: {path} â€” {e}")

# --- Final arrays ---
X_500 = np.array(X_500)
Y_500 = np.array(Y_500)
X_100 = np.array(X_100)
Y_100 = np.array(Y_100)


Skip [500Hz]: /content/drive/MyDrive/PTB-data/npy_records500/00000/00001_hr.npy â€” The length of the input vector x must be greater than padlen, which is 15.
Missing: 100Hz - /content/drive/MyDrive/PTB-data/npy_records100/00000/00001_hr.npy
Skip [500Hz]: /content/drive/MyDrive/PTB-data/npy_records500/00000/00002_hr.npy â€” The length of the input vector x must be greater than padlen, which is 15.
Missing: 100Hz - /content/drive/MyDrive/PTB-data/npy_records100/00000/00002_hr.npy
Skip [500Hz]: /content/drive/MyDrive/PTB-data/npy_records500/00000/00003_hr.npy â€” The length of the input vector x must be greater than padlen, which is 15.
Missing: 100Hz - /content/drive/MyDrive/PTB-data/npy_records100/00000/00003_hr.npy
Skip [500Hz]: /content/drive/MyDrive/PTB-data/npy_records500/00000/00004_hr.npy â€” The length of the input vector x must be greater than padlen, which is 15.
Missing: 100Hz - /content/drive/MyDrive/PTB-data/npy_records100/00000/00004_hr.npy
Skip [500Hz]: /content/drive/MyD