In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import pandas as pd




In [2]:
# Load the CSV file into a DataFrame
csv_path = 'D:\Omar\Friends\European_HealthCare_Hackathon\ecg_hospitalization\data\processed\meta\data_pairs.csv'  # Replace with the path to your CSV file
df = pd.read_csv(csv_path)

In [3]:
df

Unnamed: 0,np_file_path,label
0,data\processed\npy\MUSE_20231122_211154_57000.npy,0
1,data\processed\npy\MUSE_20231122_221512_86000.npy,0
2,data\processed\npy\MUSE_20231122_222724_38000.npy,0
3,data\processed\npy\MUSE_20231122_101126_09000.npy,1
4,data\processed\npy\MUSE_20231120_222227_88000.npy,0
...,...,...
23287,data\processed\npy\MUSE_20231120_184943_05000.npy,0
23288,data\processed\npy\MUSE_20231122_222459_73000.npy,0
23289,data\processed\npy\MUSE_20231120_101111_35000.npy,0
23290,data\processed\npy\MUSE_20231122_210819_41000.npy,0


In [4]:
# Initialize empty lists to store data
data_list = []
labels_list = []

In [5]:
# Iterate through rows and load .npy files
for index, row in df.iterrows():
    file_path = row['np_file_path']
    label = row['label']

    # Load the .npy file
    loaded_data = np.load(file_path)

    # Append the loaded data and label to the lists
    data_list.append(loaded_data)
    labels_list.append(label)

In [6]:
# Convert lists to NumPy arrays
data_array = np.array(data_list)
labels_array = np.array(labels_list)

In [7]:
# Print shapes for verification
print("Data Array Shape:", data_array.shape)
print("Labels Array Shape:", labels_array.shape)

Data Array Shape: (23292, 8, 5000)
Labels Array Shape: (23292,)


In [8]:
# Assuming ecg_data is your 3D array of raw ECG data
# Assuming labels are integers (0, 1, 2) representing the three classes
num_patients, num_leads, num_time_points = data_array.shape

In [9]:
#data_array_2d = data_array.reshape(-1, data_array.shape[-1])

In [10]:
X = data_array

In [11]:
y = labels_array.flatten()

In [12]:
X.shape

(23292, 8, 5000)

In [13]:
y.shape

(23292,)

In [14]:
# Set seed for reproducibility
np.random.seed(42)

In [15]:
# Generate random indices for shuffling
indices = np.arange(len(y))
np.random.shuffle(indices)

In [16]:
indices

array([23198,  7131,   167, ...,  5390,   860, 15795])

In [17]:
# Shuffle X and y using the generated indices
X_shuffled = X[indices]
y_shuffled = y[indices]

In [18]:
X = X_shuffled
y = y_shuffled

In [19]:
# Reshape the data to fit the CNN input shape
#X = ecg_data.reshape(num_patients, num_leads, num_time_points)  # Remove the extra dimension
#X = data_array

In [20]:
# Assuming labels are integers (0, 1, 2) representing the three classes
# If not, you might need to preprocess your labels accordingly
#y = labels_array.flatten()

In [21]:
# Convert labels to categorical format
#le = LabelEncoder()
#y_cat = to_categorical(le.fit_transform(y))

In [22]:
#y_cat

In [23]:
y

array([2, 0, 0, ..., 1, 0, 0])

In [24]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [25]:
from sklearn.utils.class_weight import compute_sample_weight

In [26]:
unique_classes = np.unique(y_train)

In [27]:
unique_classes

array([0, 1, 2])

In [28]:
class_weights = compute_sample_weight(class_weight='balanced', y=y_train)

In [29]:
class_weights

array([1.14954655, 1.14954655, 1.14954655, ..., 3.51898017, 1.14954655,
       0.54173572])

In [30]:
class_weights.shape

(18633,)

In [31]:
# Build the CNN model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(num_leads, num_time_points)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))  # Assuming 3 output classes





In [32]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [33]:
#from imblearn.over_sampling import RandomOverSampler

In [34]:
# Train the model
#model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

In [35]:
# Convert labels to categorical format
le = LabelEncoder()
y_train = to_categorical(le.fit_transform(y_train))
y_test = to_categorical(le.fit_transform(y_test))

In [36]:
# Use class_weights in the fit method
model.fit(X_train, y_train, epochs=40, batch_size=32, validation_data=(X_test, y_test), sample_weight=class_weights)

Epoch 1/40


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.src.callbacks.History at 0x26fb3d75ad0>

In [39]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 1.1190824508666992
Test Accuracy: 0.09444086998701096
