In [1]:
import pickle
import scipy.constants
import datetime
import tensorflow as tf
import numpy as np
import tqdm
import os
import pandas as pd

In [2]:
import hchs_data_pre_processing
import hchs_transformations
import simclr_models
import simclr_utitlities

In [19]:
working_directory = 'test_run_mesa'
dataset_save_path = os.path.join(os.getcwd(), "PickledData", "mesa")
user_datasets_path = os.path.join(dataset_save_path, "mesaid_to_data_label_dict.pickle")
user_dataset_float_path = os.path.join(dataset_save_path, "user_dataset_resized.pickle")
path_to_test_train_split_dict = os.path.join(dataset_save_path, "test_train_split_dict.pickle")
path_to_reduced_test_train_split_dict = os.path.join(dataset_save_path, "reduced_test_train_split_dict.pickle")
sample_key = 3950
path_to_np_train = os.path.join(dataset_save_path, "np_train.pickle")
path_to_np_test = os.path.join(dataset_save_path, "np_test.pickle")
path_to_np_val = os.path.join(dataset_save_path, "np_val.pickle")


# Load mesa data

In [16]:
with open(user_datasets_path, 'rb') as f:
    user_datasets = pickle.load(f)

In [18]:
user_datasets[sample_key]

[array([[9.000e+00, 2.652e+01, 2.880e+00, 2.270e+00, 2.720e-01],
        [1.450e+02, 1.180e+01, 2.170e+00, 7.850e-01, 1.140e-01],
        [1.800e+01, 3.309e+01, 5.060e+00, 2.420e+00, 6.600e-01],
        ...,
        [0.000e+00, 7.610e+00, 4.190e-01, 7.350e-01, 2.230e-01],
        [1.840e+02, 8.230e+00, 4.190e-01, 8.020e-01, 2.600e-01],
        [3.930e+02, 9.440e+00, 5.090e-01, 9.130e-01, 2.900e-01]]),
 array(['ACTIVE', 'ACTIVE', 'ACTIVE', ..., 'ACTIVE', 'ACTIVE', 'ACTIVE'],
       dtype='<U6')]

# Pre Processing

In [20]:
# Parameters
window_size = 500
input_shape = (window_size, 5)

# Dataset Metadata 
transformation_multiple = 1
dataset_name = 'mesa.pkl'
dataset_name_user_split = 'hchs_user_split.pkl'

label_list = ["ACTIVE", "REST", "REST-S"]
label_list_full_name = label_list
has_null_class = False

label_map = dict([(l, i) for i, l in enumerate(label_list)])

output_shape = len(label_list)

model_save_name = f"mesa_acc"

sampling_rate = 50.0
unit_conversion = scipy.constants.g

# a fixed user-split

with open(path_to_reduced_test_train_split_dict, 'rb') as f:
    test_train_user_dict = pickle.load(f)

test_users = test_train_user_dict['test']
train_users = test_train_user_dict['train']

print(f'Test Numbers: {len(test_users)}, Train Numbers: {len(train_users)}')

Test Numbers: 2, Train Numbers: 8


In [21]:
np_train, np_val, np_test = hchs_data_pre_processing.pre_process_dataset_composite(
    user_datasets=user_datasets, 
    label_map=label_map, 
    output_shape=output_shape, 
    train_users=train_users, 
    test_users=test_users, 
    window_size=window_size, 
    shift=window_size//2, 
    normalise_dataset=True, 
    verbose=1
)

step 1 done
step 2 done
step 3 done
Test
(array(['ACTIVE', 'REST', 'REST-S'], dtype='<U6'), array([103175,      2,  54818], dtype=int64))
(array([0, 1, 2]), array([103175,      2,  54818], dtype=int64))
-----------------
Train
(array(['ACTIVE', 'REST-S'], dtype='<U6'), array([351, 184], dtype=int64))
(array([0, 2]), array([351, 184], dtype=int64))
-----------------
step 4 done
step 5 done
(428, 500, 5)
(107, 500, 5)
(157995, 500, 5)
step 6 done


In [22]:
batch_size = 512
decay_steps = 1000
# epochs = 200
epochs = 3
temperature = 0.1
trasnformation_indices = [1, 2] # Use Scaling and rotation trasnformation

transform_funcs_vectorised = [
    hchs_transformations.noise_transform_vectorized, 
    hchs_transformations.scaling_transform_vectorized, 
    # transformations.rotation_transform_vectorized, 
    hchs_transformations.negate_transform_vectorized, 
    hchs_transformations.time_flip_transform_vectorized, 
    hchs_transformations.time_segment_permutation_transform_improved, 
    hchs_transformations.time_warp_transform_low_cost, 
    hchs_transformations.channel_shuffle_transform_vectorized
]
# transform_funcs_names = ['noised', 'scaled', 'rotated', 'negated', 'time_flipped', 'permuted', 'time_warped', 'channel_shuffled']
transform_funcs_names = ['noised', 'scaled', 'negated', 'time_flipped', 'permuted', 'time_warped', 'channel_shuffled']



In [23]:
start_time = datetime.datetime.now()
start_time_str = start_time.strftime("%Y%m%d-%H%M%S")
tf.keras.backend.set_floatx('float32')

lr_decayed_fn = tf.keras.experimental.CosineDecay(initial_learning_rate=0.1, decay_steps=decay_steps)
optimizer = tf.keras.optimizers.SGD(lr_decayed_fn)
transformation_function = simclr_utitlities.generate_combined_transform_function(transform_funcs_vectorised, indices=trasnformation_indices)

base_model = simclr_models.create_base_model(input_shape, model_name="base_model")
simclr_model = simclr_models.attach_simclr_head(base_model)
simclr_model.summary()

trained_simclr_model, epoch_losses = simclr_utitlities.simclr_train_model(simclr_model, np_train[0], optimizer, batch_size, transformation_function, temperature=temperature, epochs=epochs, is_trasnform_function_vectorized=True, verbose=1)

simclr_model_save_path = f"{working_directory}{start_time_str}_simclr.hdf5"
trained_simclr_model.save(simclr_model_save_path)



<function scaling_transform_vectorized at 0x00000272A2F81700>
<function negate_transform_vectorized at 0x00000272A2F81790>
Model: "base_model_simclr"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 500, 5)]          0         
_________________________________________________________________
conv1d (Conv1D)              (None, 477, 32)           3872      
_________________________________________________________________
dropout (Dropout)            (None, 477, 32)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 462, 64)           32832     
_________________________________________________________________
dropout_1 (Dropout)          (None, 462, 64)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 455, 96)           492

In [24]:

total_epochs = 5
batch_size = 200
tag = "linear_eval"

simclr_model = tf.keras.models.load_model(simclr_model_save_path)
linear_evaluation_model = simclr_models.create_linear_model_from_base_model(simclr_model, output_shape, intermediate_layer=7)

best_model_file_name = f"{working_directory}{start_time_str}_simclr_{tag}.hdf5"
best_model_callback = tf.keras.callbacks.ModelCheckpoint(best_model_file_name,
    monitor='val_loss', mode='min', save_best_only=True, save_weights_only=False, verbose=0
)

training_history = linear_evaluation_model.fit(
    x = np_train[0],
    y = np_train[1],
    batch_size=batch_size,
    shuffle=True,
    epochs=total_epochs,
    callbacks=[best_model_callback],
    validation_data=np_val
)

best_model = tf.keras.models.load_model(best_model_file_name)

print("Model with lowest validation Loss:")
print(simclr_utitlities.evaluate_model_simple(best_model.predict(np_test[0]), np_test[1], return_dict=True))
print("Model in last epoch")
print(simclr_utitlities.evaluate_model_simple(linear_evaluation_model.predict(np_test[0]), np_test[1], return_dict=True))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model with lowest validation Loss:
{'Confusion Matrix': array([[90429,     0, 12746],
       [    1,     0,     1],
       [15963,     0, 38855]], dtype=int64), 'F1 Macro': 0.5310745924015938, 'F1 Micro': 0.8182790594639071, 'F1 Weighted': 0.8169221830287947, 'Precision': 0.5343090751249021, 'Recall': 0.5284207834954192, 'Kappa': 0.5934072303161199}
Model in last epoch
{'Confusion Matrix': array([[90429,     0, 12746],
       [    1,     0,     1],
       [15963,     0, 38855]], dtype=int64), 'F1 Macro': 0.5310745924015938, 'F1 Micro': 0.8182790594639071, 'F1 Weighted': 0.8169221830287947, 'Precision': 0.5343090751249021, 'Recall': 0.5284207834954192, 'Kappa': 0.5934072303161199}
