In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from pathlib import Path

# Data loading and preprocessing functions (as provided)
def load_data(ecg_file, labels_file):
    ecg_signal = pd.read_csv(ecg_file, header=None).values.flatten()
    labels = pd.read_csv(labels_file)
    return ecg_signal, labels

def create_mask(ecg_signal, labels):
    mask = np.zeros(len(ecg_signal), dtype=int)
    for _, row in labels.iterrows():
        start, end = row['ROILimits_1'], row['ROILimits_2']
        value = row['Value']
        mask[start:end] = ['P', 'QRS', 'T'].index(value) + 1
    return mask

def resize_data(ecg_signal, mask, segment_length=5000):
    num_segments = len(ecg_signal) // segment_length
    ecg_segments = np.array_split(ecg_signal[:num_segments*segment_length], num_segments)
    mask_segments = np.array_split(mask[:num_segments*segment_length], num_segments)
    return np.array(ecg_segments), np.array(mask_segments)

# CNN model definition
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Conv1D(32, kernel_size=5, activation='relu', input_shape=input_shape),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(64, kernel_size=5, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Conv1D(64, kernel_size=5, activation='relu'),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Main execution
if __name__ == "__main__":
    data_dir = Path('path/to/your/data/directory')
    
    # Load and process all files
    all_ecg_segments = []
    all_mask_segments = []

    for i in range(1, 211):  # Assuming 210 files
        ecg_file = data_dir / f'ecg{i}_ecgSignal.csv'
        labels_file = data_dir / f'ecg{i}_signalRegionLabels.csv'
        
        ecg_signal, labels = load_data(ecg_file, labels_file)
        mask = create_mask(ecg_signal, labels)
        ecg_segments, mask_segments = resize_data(ecg_signal, mask)
        
        all_ecg_segments.extend(ecg_segments)
        all_mask_segments.extend(mask_segments)

    # Convert to numpy arrays
    X = np.array(all_ecg_segments)
    y = np.array(all_mask_segments)

    # Reshape X to add channel dimension
    X = X.reshape(X.shape[0], X.shape[1], 1)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define model parameters
    input_shape = (X_train.shape[1], 1)
    num_classes = 4  # Background, P, QRS, T

    # Create and compile the model
    model = create_cnn_model(input_shape, num_classes)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {test_accuracy:.4f}")

    # Make predictions
    predictions = model.predict(X_test)

    # Plot results for a sample
    sample_idx = 0
    plt.figure(figsize=(12, 6))
    plt.plot(X_test[sample_idx].flatten())
    plt.title("ECG Signal with Predicted Segments")
    plt.xlabel("Sample")
    plt.ylabel("Amplitude")

    # Add colored regions for predicted segments
    colors = ['gray', 'red', 'green', 'blue']
    labels = ['Background', 'P', 'QRS', 'T']
    for i, label in enumerate(labels):
        mask = predictions[sample_idx].argmax(axis=1) == i
        plt.fill_between(range(len(mask)), X_test[sample_idx].flatten(), where=mask, alpha=0.3, color=colors[i], label=label)

    plt.legend()
    plt.show()

    # Plot training history
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()


In [46]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from pathlib import Path

In [47]:
# Data loading and preprocessing functions (as provided)
def load_data(ecg_file, labels_file):
    ecg_signal = pd.read_csv(ecg_file, header=None).values.flatten()
    region_labels = pd.read_csv(labels_file)
    return ecg_signal, region_labels

def create_mask(ecg_signal, region_labels):
    mask = np.zeros(len(ecg_signal), dtype=int)
    for _, row in region_labels.iterrows():
        start, end = row['ROILimits_1'], row['ROILimits_2']
        value = row['Value']
        mask[start:end] = ['P', 'QRS', 'T'].index(value) + 1
    return mask

def resize_data(ecg_signal, mask, segment_length=5000):
    num_segments = len(ecg_signal) // segment_length
    ecg_segments = np.array_split(ecg_signal[:num_segments*segment_length], num_segments)
    mask_segments = np.array_split(mask[:num_segments*segment_length], num_segments)
    return np.array(ecg_segments), np.array(mask_segments)



In [48]:
import os
from pathlib import Path

# Assuming your CSV files are in a 'data' directory relative to your script
data_dir = Path('csvdata')
print(data_dir)



csvdata


In [49]:
# Load and process all files
all_ecg_segments = []
all_mask_segments = []

for i in range(1, 211):  # Assuming 210 files
    ecg_file = data_dir / f'ecg{i}_ecgSignal.csv'
    labels_file = data_dir / f'ecg{i}_signalRegionLabels.csv'
        
    ecg_signal, region_labels = load_data(ecg_file, labels_file)
    mask = create_mask(ecg_signal, region_labels)
    ecg_segments, mask_segments = resize_data(ecg_signal, mask)
        
    all_ecg_segments.extend(ecg_segments)
    all_mask_segments.extend(mask_segments)


In [50]:
# Convert to numpy arrays
X = np.array(all_ecg_segments)
y = np.array(all_mask_segments)

In [53]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

label_encoder = LabelEncoder()
integer_labels = label_encoder.fit_transform(y)

onehot_encoder = OneHotEncoder().set_params(sparse_output=False)
onehot_labels = onehot_encoder.fit_transform(integer_labels.reshape(-1, 1))

ValueError: y should be a 1d array, got an array of shape (9346, 5000) instead.

In [58]:
print(X.shape)
print(X[:5])
print(y.shape)
print(y[:5])
print(onehot_labels.shape)

(9346, 5000)
[[-79 -77 -79 ... -71 -72 -73]
 [-73 -70 -68 ... -59 -54 -54]
 [-54 -51 -48 ... -82 -84 -85]
 [-84 -83 -83 ... -68 -67 -70]
 [-71 -69 -69 ... -64 -62 -65]]
(9346, 5000)
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 3 3 3]
 [3 3 3 ... 0 0 0]
 [0 0 0 ... 3 3 3]
 [3 3 3 ... 0 0 0]]


NameError: name 'onehot_labels' is not defined

In [35]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    

In [36]:
print(X_train.shape)
print(y_train.shape)

(6542, 5000)
(6542, 5000)


# Reshape X to add channel dimension
X1 = X.reshape(X.shape[0], X.shape[1], 1)
print(X1.shape)

In [39]:
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)

(6542, 5000, 1)
(2804, 5000, 1)
(6542, 5000)


In [42]:
# CNN model definition

model = models.Sequential([
    layers.Conv1D(32, kernel_size=5, activation='relu', input_shape=input_shape),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(64, kernel_size=5, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(64, kernel_size=5, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(4, activation='softmax')   
    ])

# target data (y_train) should be a 1D array of shape (None,) for sparse categorical crossentropy.
y_train = y_train.flatten()
y_test = y_test.flatten()
print(y_train.shape)
print(y_test.shape)


In [43]:

# Create and compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/5


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(None, 5000), output.shape=(None, 4)

In [29]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_accuracy:.4f}")

   
    

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 2804
'y' sizes: 14020000


In [30]:
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
print('x:', type(X_test))
print('y:', type(y_test))

X_test shape: (2804, 5000, 1)
y_test shape: (14020000,)
x: <class 'numpy.ndarray'>
y: <class 'numpy.ndarray'>
