In [253]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [295]:
import os
import json
import importlib
import numpy as np
os.chdir('/home/ec2-user/ecg-autoregressive')

In [296]:
import data
import waveform_embedder
importlib.reload(waveform_embedder)
importlib.reload(data)

<module 'data' from '/home/ec2-user/ecg-autoregressive/data.py'>

In [319]:
model_name = 'vit_small'

In [320]:
hyperparams = json.loads(open(os.path.join('models', model_name, 'hyperparameters.json')).read())

In [321]:
hyperparams

{'window_size': 256,
 'latent_dim': 16,
 'num_transformer_blocks': 1,
 'dim_feedforward': 256,
 'encoder_hidden_dim': 64,
 'num_heads': 4,
 'epochs': 50,
 'batch_size': 64,
 'device': 'cuda'}

In [322]:
window_size = hyperparams['window_size']
patch_size = hyperparams['latent_dim']
num_transformer_blocks = hyperparams['num_transformer_blocks']
encoder_hidden_dim = hyperparams['encoder_hidden_dim']
dim_feedforward = hyperparams['dim_feedforward']
num_heads = hyperparams['num_heads']
device = 'cuda'
model_path = os.path.join('models', model_name, 'model.pth')

#embedder = waveform_embedder.WaveformEmbedder(model_path, window_size, patch_size, num_transformer_blocks, encoder_hidden_dim, dim_feedforward, num_heads, device)

embedder = waveform_embedder.ViTWaveformEmbedder(model_path, window_size, model_name, device)

In [323]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(embedder.model)

21496192

In [324]:
train_emb = data.HDF5DatasetKeras(data.H5_PATH, split_type='train', embedder=embedder, subsample=10, window_size=window_size)
val_emb = data.HDF5DatasetKeras(data.H5_PATH, split_type='val', embedder=embedder, subsample=10, window_size=window_size)
test_emb = data.HDF5DatasetKeras(data.H5_PATH, split_type='test', embedder=embedder, subsample=10, window_size=window_size)

In [325]:
train = data.HDF5DatasetKeras(data.H5_PATH, split_type='train', subsample=10, window_size=window_size)
val = data.HDF5DatasetKeras(data.H5_PATH, split_type='val', subsample=10, window_size=window_size)
test = data.HDF5DatasetKeras(data.H5_PATH, split_type='test', subsample=10, window_size=window_size)

In [326]:
print(train[0][0].shape)
print(train[0][1].shape)

(10, 10, 256)
(10,)


In [327]:
train_emb[0][0].shape

(10, 384)

In [328]:
train_emb[0][1].shape

(10,)

In [329]:
# Initialize lists to hold your data
X_train_emb, y_train_emb = [], []

# Load training data
for X_batch, y_batch in train_emb:
    X_train_emb.append(X_batch)
    y_train_emb.append(y_batch)
    
# Convert lists to arrays
X_train_emb = np.concatenate(X_train_emb, axis=0)
y_train_emb = np.concatenate(y_train_emb, axis=0)

In [330]:
X_val_emb, y_val_emb = [], []
for X_batch, y_batch in val_emb:
    X_val_emb.append(X_batch)
    y_val_emb.append(y_batch)

X_val_emb = np.concatenate(X_val_emb, axis=0)
y_val_emb = np.concatenate(y_val_emb, axis=0)

In [331]:
X_val_emb.shape

(240, 384)

In [332]:
y_val_emb.shape

(240,)

In [333]:
from sklearn.utils.class_weight import compute_class_weight

classes = np.unique(y_train_emb)
class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=y_train_emb)
class_weight_dict = {class_label: weight for class_label, weight in zip(classes, class_weights)}

# Count the occurrences of each class
class_counts = np.bincount(y_train_emb.astype(int))

# Find the majority class and its percentage
majority_class = np.argmax(class_counts)
majority_class_percentage = (class_counts[majority_class] / len(y_train_emb)) * 100

print(f'Class {majority_class} is {majority_class_percentage:.2f}% of the samples [train]')

# Count the occurrences of each class
class_counts = np.bincount(y_val_emb.astype(int))

# Find the majority class and its percentage
majority_class = np.argmax(class_counts)
majority_class_percentage = (class_counts[majority_class] / len(y_val_emb)) * 100

print(f'Class {majority_class} is {majority_class_percentage:.2f}% of the samples [val]')

print(class_weight_dict)

Class 0 is 77.25% of the samples [train]
Class 0 is 62.50% of the samples [val]
{0: 0.6472602739726028, 1: 2.197674418604651}


In [334]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, AdamW
from tensorflow.keras.metrics import AUC

# Define the model
model = Sequential([
    Dense(256, activation='relu', input_shape=(384,)),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.summary()

# Compile the model
model.compile(optimizer=AdamW(learning_rate=1e-3), loss='binary_crossentropy', metrics=['accuracy', AUC()])
#model.fit(X_train_emb, y_train_emb, epochs=15, validation_data=(X_val_emb, y_val_emb), class_weight=class_weight_dict)
model.fit(X_train_emb, y_train_emb, epochs=40, validation_data=(X_val_emb, y_val_emb))

Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_80 (Dense)            (None, 256)               98560     
                                                                 
 dropout_59 (Dropout)        (None, 256)               0         
                                                                 
 dense_81 (Dense)            (None, 128)               32896     
                                                                 
 dropout_60 (Dropout)        (None, 128)               0         
                                                                 
 dense_82 (Dense)            (None, 128)               16512     
                                                                 
 dropout_61 (Dropout)        (None, 128)               0         
                                                                 
 dense_83 (Dense)            (None, 1)               

Epoch 40/40


<keras.src.callbacks.History at 0x7fd77c483550>

In [72]:
import numpy as np
from tensorflow.keras.utils import to_categorical

def reshape_and_load_data(dataloader):
    X, y = [], []
    for data, labels in dataloader:
        reshaped_data = data.reshape(data.shape[0], -1)  # Reshape from (10, 10, 64) to (10, 640)
        X.append(reshaped_data)
        y.append(labels)
        if reshaped_data.shape[0] != labels.shape[0]:
            print('error with shapes')
            print(reshaped_data.shape)
            print(y.shape)
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
    print(X.shape)
    print(y.shape)
    return X, y

def print_class_representation(y):
    class_counts = np.bincount(y.astype(int))
    majority_class = np.argmax(class_counts)
    majority_percentage = (class_counts[majority_class] / len(y)) * 100
    print(f'Class {majority_class} is {majority_percentage:.2f}% of the samples')

# Assuming `train` and `val` are your dataloaders
X_train, y_train = reshape_and_load_data(train)
X_val, y_val = reshape_and_load_data(val)

# Print class representation
print_class_representation(y_train)
print_class_representation(y_val)

(1890, 2560)
(1890,)
(240, 2560)
(240,)
Class 0 is 77.25% of the samples
Class 0 is 62.50% of the samples


In [20]:
y_train.shape

(1890,)

In [82]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import AdamW

def build_model(input_shape):
    model = Sequential([
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=32, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=32, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Flatten(),
#        Dense(64, activation='relu'),
        Dense(256, activation='relu'),
        Dense(1, activation='sigmoid')  # Adjust based on your classification task
    ])
    model.compile(optimizer=AdamW(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy', AUC()])
    return model

In [83]:
# Compute class weights
from sklearn.utils.class_weight import compute_class_weight

model = build_model((X_train.shape[1], 1))  # Adjust the input shape for 1D CNN
model.summary()

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {i: weight for i, weight in zip(np.unique(y_train), class_weights)}

# Reshape for 1D CNN, adding channel dimension
#X_train = np.expand_dims(X_train, -1)
#X_val = np.expand_dims(X_val, -1)

print(X_train.shape)

# Train the model
model.fit(X_train, y_train, epochs=15, validation_data=(X_val, y_val), class_weight=class_weight_dict)

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_12 (Conv1D)          (None, 2558, 32)          128       
                                                                 
 max_pooling1d_12 (MaxPooli  (None, 1279, 32)          0         
 ng1D)                                                           
                                                                 
 conv1d_13 (Conv1D)          (None, 1277, 32)          3104      
                                                                 
 max_pooling1d_13 (MaxPooli  (None, 638, 32)           0         
 ng1D)                                                           
                                                                 
 conv1d_14 (Conv1D)          (None, 636, 32)           3104      
                                                                 
 max_pooling1d_14 (MaxPooli  (None, 318, 32)         

<keras.src.callbacks.History at 0x7f4d1c741a80>