In [1]:
import pickle
import scipy.constants
import datetime
import tensorflow as tf
import numpy as np
import tqdm
import os
import pandas as pd
import random
random.seed(42)

In [2]:
import hchs_data_pre_processing
import hchs_transformations
import simclr_models
import simclr_utitlities

In [3]:
working_directory = 'test_run_chapman/'
dataset_save_path = os.path.join(os.getcwd(), "PickledData", "chapman")
# user_datasets_path = os.path.join(dataset_save_path, "patient_to_data_label_dict.pickle")
user_datasets_path = os.path.join(dataset_save_path, "patient_to_data_label_per_row_dict.pickle")

# user_dataset_resized_path = os.path.join(dataset_save_path, "user_dataset_resized.pickle")
path_to_test_train_split_dict = os.path.join(dataset_save_path, "test_train_split_dict.pickle")
path_to_reduced_test_train_split_dict = os.path.join(dataset_save_path, "reduced_test_train_split_dict.pickle")
sample_key = 163225
path_to_np_train = os.path.join(dataset_save_path, "np_train.pickle")
path_to_np_test = os.path.join(dataset_save_path, "np_test.pickle")
path_to_np_val = os.path.join(dataset_save_path, "np_val.pickle")


# Load Chapman Data

In [4]:
with open(user_datasets_path, 'rb') as f:
    user_datasets = pickle.load(f)

In [10]:
sample_key = random.sample(user_datasets.keys(), 1)[0]
sample_keys = random.sample(user_datasets.keys(), 10)
print(sample_keys)

['MUSE_20180113_073909_10000', 'MUSE_20180118_170931_17000', 'MUSE_20180118_132445_87000', 'MUSE_20180112_140548_57000', 'MUSE_20180114_130112_33000', 'MUSE_20180113_075301_94000', 'MUSE_20180209_171512_54000', 'MUSE_20180118_134301_96000', 'MUSE_20180112_133201_32000', 'MUSE_20180209_175337_71000']


In [11]:
for sample_key in sample_keys:
    print(user_datasets[sample_key][1])

[2 2 2 ... 2 2 2]
[3 3 3 ... 3 3 3]
[1 1 1 ... 1 1 1]
[0 0 0 ... 0 0 0]
[3 3 3 ... 3 3 3]
[2 2 2 ... 2 2 2]
[3 3 3 ... 3 3 3]
[2 2 2 ... 2 2 2]
[1 1 1 ... 1 1 1]
[3 3 3 ... 3 3 3]


# Pre Processing

In [29]:
# Parameters

# corresponds to 1 second of data
window_size = 500
input_shape = (window_size, 12)

# Dataset Metadata 
transformation_multiple = 1
dataset_name = 'chapman.pkl'
dataset_name_user_split = 'chapman_user_split.pkl'

label_list = [0, 1, 2, 3]
label_list_full_name = ['AFIB', 'GSVT', 'SB', 'SR']
has_null_class = False

# label_map = dict([(label, fullname) for label, fullname in zip(label_list, label_list_full_name)])
# since we have already applied the encoding 
label_map = dict([(label, label) for label in label_list])
output_shape = len(label_list)

model_save_name = f"chapman_acc"

sampling_rate = 50.0
unit_conversion = scipy.constants.g

# a fixed user-split

with open(path_to_test_train_split_dict, 'rb') as f:
    test_train_user_dict = pickle.load(f)

test_users = test_train_user_dict['test']
train_users = test_train_user_dict['train']

print(f'Test Numbers: {len(test_users)}, Train Numbers: {len(train_users)}')

Test Numbers: 2, Train Numbers: 8


In [30]:
np_train, np_val, np_test = hchs_data_pre_processing.pre_process_dataset_composite(
    user_datasets=user_datasets, 
    label_map=label_map, 
    output_shape=output_shape, 
    train_users=train_users, 
    test_users=test_users, 
    window_size=window_size, 
    shift=window_size//2, 
    normalise_dataset=True, 
    verbose=1
)

step 1 done
step 2 done
step 3 done
Test
(array([0, 1, 2, 3]), array([42237, 43776, 73872, 42237], dtype=int64))
(array([0, 1, 2, 3]), array([42237, 43776, 73872, 42237], dtype=int64))
-----------------
Train
(array([0, 1, 2, 3]), array([38, 57, 19, 38], dtype=int64))
(array([0, 1, 2, 3]), array([38, 57, 19, 38], dtype=int64))
-----------------
step 4 done
step 5 done
(121, 500, 12)
(31, 500, 12)
(202122, 500, 12)
step 6 done


In [34]:
batch_size = 512
decay_steps = 1000
epochs = 200
# epochs = 20
temperature = 0.1
trasnformation_indices = [1, 2] # Use Scaling and rotation trasnformation

transform_funcs_vectorised = [
    hchs_transformations.noise_transform_vectorized, 
    hchs_transformations.scaling_transform_vectorized, 
    # transformations.rotation_transform_vectorized, 
    hchs_transformations.negate_transform_vectorized, 
    hchs_transformations.time_flip_transform_vectorized, 
    hchs_transformations.time_segment_permutation_transform_improved, 
    hchs_transformations.time_warp_transform_low_cost, 
    hchs_transformations.channel_shuffle_transform_vectorized
]
# transform_funcs_names = ['noised', 'scaled', 'rotated', 'negated', 'time_flipped', 'permuted', 'time_warped', 'channel_shuffled']
transform_funcs_names = ['noised', 'scaled', 'negated', 'time_flipped', 'permuted', 'time_warped', 'channel_shuffled']



In [35]:
start_time = datetime.datetime.now()
start_time_str = start_time.strftime("%Y%m%d-%H%M%S")
tf.keras.backend.set_floatx('float32')

lr_decayed_fn = tf.keras.experimental.CosineDecay(initial_learning_rate=0.1, decay_steps=decay_steps)
optimizer = tf.keras.optimizers.SGD(lr_decayed_fn)
transformation_function = simclr_utitlities.generate_combined_transform_function(transform_funcs_vectorised, indices=trasnformation_indices)

base_model = simclr_models.create_base_model(input_shape, model_name="base_model")
simclr_model = simclr_models.attach_simclr_head(base_model)
simclr_model.summary()

trained_simclr_model, epoch_losses = simclr_utitlities.simclr_train_model(simclr_model, np_train[0], optimizer, batch_size, transformation_function, temperature=temperature, epochs=epochs, is_trasnform_function_vectorized=True, verbose=1)

simclr_model_save_path = f"{working_directory}{start_time_str}_simclr.hdf5"
trained_simclr_model.save(simclr_model_save_path)



<function scaling_transform_vectorized at 0x000001AE24B02E50>
<function negate_transform_vectorized at 0x000001AE24B02EE0>
Model: "base_model_simclr"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 500, 12)]         0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 477, 32)           9248      
_________________________________________________________________
dropout_3 (Dropout)          (None, 477, 32)           0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 462, 64)           32832     
_________________________________________________________________
dropout_4 (Dropout)          (None, 462, 64)           0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 455, 96)           492

In [12]:

total_epochs = 50
batch_size = 200
tag = "linear_eval"

simclr_model = tf.keras.models.load_model(simclr_model_save_path)
linear_evaluation_model = simclr_models.create_linear_model_from_base_model(simclr_model, output_shape, intermediate_layer=7)

best_model_file_name = f"{working_directory}{start_time_str}_simclr_{tag}.hdf5"
best_model_callback = tf.keras.callbacks.ModelCheckpoint(best_model_file_name,
    monitor='val_loss', mode='min', save_best_only=True, save_weights_only=False, verbose=0
)

training_history = linear_evaluation_model.fit(
    x = np_train[0],
    y = np_train[1],
    batch_size=batch_size,
    shuffle=True,
    epochs=total_epochs,
    callbacks=[best_model_callback],
    validation_data=np_val
)

best_model = tf.keras.models.load_model(best_model_file_name)

print("Model with lowest validation Loss:")
print(simclr_utitlities.evaluate_model_simple(best_model.predict(np_test[0]), np_test[1], return_dict=True))
print("Model in last epoch")
print(simclr_utitlities.evaluate_model_simple(linear_evaluation_model.predict(np_test[0]), np_test[1], return_dict=True))
