In [1]:
import os
import numpy as np
import numpy.typing as npt
from pathlib import Path
import sklearn.model_selection
import tensorflow as tf
from neuralspot.tflite.metrics import MultiF1Score
from sleepkit.defines import SKTrainParams, get_sleep_stage_classes, get_sleep_stage_class_names, get_sleep_stage_class_mapping
from sleepkit.datasets import Hdf5Dataset, MesaDataset
from sleepkit.utils import env_flag, set_random_seed, setup_logger
from sleepkit.metrics import compute_iou, confusion_matrix_plot
from sleepkit.datasets.utils import create_dataset_from_data

from sleepkit.defines import SKTrainParams
from neuralspot.tflite.metrics import get_flops
from neuralspot.tflite.model import get_strategy
from sleepkit.models.unet import UNet, UNetParams, UNetBlockParams
from sleepkit.models.unext import UNext, UNextParams, UNextBlockParams

2023-09-29 20:37:15.423231: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-09-29 20:37:15.444457: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
logger = setup_logger(__name__)

In [3]:
params = SKTrainParams(
    job_dir=Path("..", "results", "mesa-fs001", "experiment-002"),
    ds_path=Path("..", "datasets", "processed", "mesa-fs001"),
    sampling_rate=2,
    frame_size=120,
    samples_per_subject=200,
    val_samples_per_subject=200,
    val_subjects=0.2,
    batch_size=256,
    buffer_size=50000,
    epochs=100,
    steps_per_epoch=100,
    val_metric="loss",
    val_size=50000,
    # Extra params
    lr_rate=5e-3,
    lr_cycles=1,
    label_smoothing=0.1,
    num_sleep_stages=2,
)

In [None]:
def load_model_v1(inputs: tf.Tensor, num_classes: int = 2):
    blocks = [
        UNetBlockParams(filters=12, depth=1, kernel=(1, 5), strides=(1, 2), skip=True, seperable=True, dropout=0.2),
        UNetBlockParams(filters=24, depth=1, kernel=(1, 5), strides=(1, 2), skip=True, seperable=True, dropout=0.2),
        UNetBlockParams(filters=36, depth=1, kernel=(1, 5), strides=(1, 2), skip=True, seperable=False, dropout=0.2),
    ]
    return UNet(
        inputs,
        params=UNetParams(
            blocks=blocks,
            output_kernel_size=(1, 5),
            include_top=True,
            use_logits=False,
            include_rnn=False,
        ),
        num_classes=num_classes,
    )

def load_model(inputs: tf.Tensor, num_classes: int = 2):
    y = inputs   
    y = tf.keras.layers.Conv1D(filters=24, kernel_size=9, strides=1, padding="same")(y)
    # y = tf.keras.layers.BatchNormalization()(y)
    y = tf.keras.layers.Activation(tf.nn.relu6)(y)
    y = tf.keras.layers.SpatialDropout1D(rate=0.2)(y)

    y = tf.keras.layers.Conv1D(filters=32, kernel_size=5, strides=1, padding="same")(y)
    # y = tf.keras.layers.BatchNormalization()(y)
    y = tf.keras.layers.Activation(tf.nn.relu6)(y)   
    # y = tf.keras.layers.SpatialDropout1D(rate=0.2)(y) 

    # y = tf.keras.layers.Conv1D(filters=32, kernel_size=5, strides=1, padding="same")(y)
    # y = tf.keras.layers.BatchNormalization()(y)
    # y = tf.keras.layers.Activation(tf.nn.relu6)(y)    

    y = tf.keras.layers.GRU(units=32, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)(y)
    y = tf.keras.layers.GRU(units=24, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)(y)

    y = tf.keras.layers.RepeatVector(inputs.shape[1])(y)
    y = tf.keras.layers.GRU(units=24, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)(y)
    y = tf.keras.layers.GRU(units=16, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)(y)
    y = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=num_classes))(y)
    y = tf.keras.layers.Softmax()(y)
    model = tf.keras.Model(inputs, y)
    return model


In [None]:
def prepare(x, y, num_classes, class_map: dict[int, int]):
    return (
        x,
        #tf.one_hot(class_map.get(sts.mode(y[-5:]).mode, 0), num_classes)
        tf.one_hot(np.vectorize(class_map.get)(y), num_classes)
    )

def load_train_datasets(params: SKTrainParams, feat_shape, class_shape, class_map, feat_cols=None):
    def preprocess(x: npt.NDArray[np.float32]):
       return x + np.random.normal(0, 0.05, size=x.shape)

    output_signature = (
        tf.TensorSpec(shape=feat_shape, dtype=tf.float32),
        tf.TensorSpec(shape=class_shape, dtype=tf.int32),
    ) 
 
    ds = Hdf5Dataset(
        ds_path=params.ds_path,
        frame_size=params.frame_size,
        mask_key="mask",
        feat_cols=feat_cols,
    )
    train_subject_ids, val_subject_ids = sklearn.model_selection.train_test_split(
        ds.train_subject_ids, test_size=params.val_subjects
    )

    def train_generator(subject_ids):
        def ds_gen():
            train_subj_gen = ds.uniform_subject_generator(subject_ids)
            return map(
                lambda x_y: prepare(preprocess(x_y[0]), x_y[1], class_shape[-1], class_map),
                ds.signal_generator(train_subj_gen, samples_per_subject=params.samples_per_subject)
            )
        return tf.data.Dataset.from_generator(
            ds_gen,
            output_signature=output_signature,
        )

    split = len(train_subject_ids) // params.data_parallelism
    train_datasets = [train_generator(
        train_subject_ids[i * split : (i + 1) * split]
    ) for i in range(params.data_parallelism)]

    # Create TF datasets
    train_ds = tf.data.Dataset.from_tensor_slices(
        train_datasets
    ).interleave(
        lambda x: x,
        cycle_length=params.data_parallelism,
        deterministic=False,
        num_parallel_calls=tf.data.AUTOTUNE,
    ).shuffle(
        buffer_size=params.buffer_size,
        reshuffle_each_iteration=True,
    ).batch(
        batch_size=params.batch_size,
        drop_remainder=False,
    ).prefetch(
        buffer_size=tf.data.AUTOTUNE
    )

    def val_generator():
        val_subj_gen = ds.uniform_subject_generator(val_subject_ids)
        return map(
            lambda x_y: prepare(preprocess(x_y[0]), x_y[1], class_shape[-1], class_map),
            ds.signal_generator(val_subj_gen, samples_per_subject=params.samples_per_subject)
        )

    val_ds = tf.data.Dataset.from_generator(
        generator=val_generator,
        output_signature=output_signature
    )
    val_x, val_y = next(val_ds.batch(params.val_size).as_numpy_iterator())
    val_ds = create_dataset_from_data(
        val_x, val_y, output_signature=output_signature
    ).batch(
        batch_size=params.batch_size,
        drop_remainder=False,
    )

    return train_ds, val_ds


In [None]:
params.seed = set_random_seed(params.seed)
logger.info(f"Random seed {params.seed}")

In [None]:
num_sleep_stages = 2

feat_names = [
    "SPO2-mu",  # 0
    "SPO2-std", # 1
    "SPO2-med", # 2
    "SPO2-iqr", # 3
    "MOV-mu",   # 4
    "MOV-std",  # 5 
    "MOV-med",  # 6
    "MOV-iqr",  # 7
    "RRI-mu",   # 8
    "RRI-std",  # 9
    "RRI-med",  # 10
    "RRI-iqr",  # 11
    "RRI-sd-rms", # 12
    "RRI-sd-std", # 13
    "HR-bpm",     # 14
    "RSP-bpm",    # 15
    "HRV-lf",     # 16
    "HRV-hf",     # 17
    "HRV-lfhf"    # 18
]
feat_cols = list(range(len(feat_names)))
# feat_cols = [
#     4, 5, 6, 7,
#     8, 9, 10, 11, 12, 14
# ]

num_feats = len(feat_cols)
target_classes = get_sleep_stage_classes(num_sleep_stages)
class_names = get_sleep_stage_class_names(num_sleep_stages)
class_mapping = get_sleep_stage_class_mapping(num_sleep_stages)
num_classes = len(target_classes)


In [None]:
os.makedirs(params.job_dir, exist_ok=True)

In [None]:
feat_shape = (params.frame_size, num_feats)
class_shape = (params.frame_size, num_classes)
inputs = tf.keras.Input(feat_shape, batch_size=None, dtype=tf.float32)

In [None]:
strategy = get_strategy()
with strategy.scope():
    print("Loading datasets...")
    train_ds, val_ds = load_train_datasets(params, feat_shape, class_shape, class_mapping, feat_cols=feat_cols)
    print("Loading model...")
    model = load_model(inputs, num_classes=len(target_classes))
    flops = get_flops(model, batch_size=1)

    # Grab optional LR parameters
    lr_rate: float = getattr(params, "lr_rate", 1e-3)
    lr_cycles: int = getattr(params, "lr_cycles", 1)
    steps_per_epoch = params.steps_per_epoch or 1000
    if lr_cycles == 1:
        scheduler = tf.keras.optimizers.schedules.CosineDecay(
            initial_learning_rate=lr_rate,
            decay_steps=int(steps_per_epoch * params.epochs),
        )
    else:
        scheduler = tf.keras.optimizers.schedules.CosineDecayRestarts(
            initial_learning_rate=lr_rate,
            first_decay_steps=int(0.1 * steps_per_epoch * params.epochs),
            t_mul=1.65 / (0.1 * lr_cycles * (lr_cycles - 1)),
            m_mul=0.4,
        )
    optimizer = tf.keras.optimizers.Adam(scheduler)
    loss = tf.keras.losses.CategoricalFocalCrossentropy(
        from_logits=False,
        label_smoothing=getattr(params, "label_smoothing", 0.1),
    )
    metrics = [
        tf.keras.metrics.CategoricalAccuracy(name="acc"),
        MultiF1Score(name="f1", dtype=tf.float32, average="macro"),
        tf.keras.metrics.OneHotIoU(
            num_classes=len(target_classes),
            target_class_ids=target_classes,
            name="iou",
        ),
    ]    
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model(inputs)

    model.summary(print_fn=logger.info)
    logger.info(f"Model requires {flops/1e6:0.2f} MFLOPS")

    params.weights_file = str(params.job_dir / "model.weights")

    model_callbacks = [
        tf.keras.callbacks.EarlyStopping(
            monitor=f"val_{params.val_metric}",
            patience=max(int(0.25 * params.epochs), 1),
            mode="max" if params.val_metric == "f1" else "auto",
            restore_best_weights=True,
        ),
        tf.keras.callbacks.ModelCheckpoint(
            filepath=params.weights_file,
            monitor=f"val_{params.val_metric}",
            save_best_only=True,
            save_weights_only=True,
            mode="max" if params.val_metric == "f1" else "auto",
            verbose=1,
        ),
        tf.keras.callbacks.CSVLogger(str(params.job_dir / "history.csv")),
        tf.keras.callbacks.TensorBoard(
            log_dir=str(params.job_dir / "logs"),
            write_steps_per_second=True
        ),
    ]


In [None]:
with strategy.scope():
    try:
        model.fit(
            train_ds,
            steps_per_epoch=steps_per_epoch,
            verbose=2,
            epochs=params.epochs,
            validation_data=val_ds,
            callbacks=model_callbacks,
        )
    except KeyboardInterrupt:
        logger.warning("Stopping training due to keyboard interrupt")


In [None]:
model.load_weights(params.weights_file)

In [None]:
test_labels = [y.numpy() for _, y in val_ds]
y_true = np.argmax(np.concatenate(test_labels).squeeze(), axis=-1)
y_pred = np.argmax(model.predict(val_ds).squeeze(), axis=-1)

# Summarize results
test_acc = np.sum(y_pred == y_true) / y_true.size
test_iou = compute_iou(y_true, y_pred, average="weighted")
f1_metric = MultiF1Score(name="f1", dtype=tf.float32, average="weighted")
f1_metric.update_state(y_true=y_true, y_pred=y_pred)
test_f1 = f1_metric.result().numpy()
# test_f1 = 0
logger.info(f"[TEST SET] ACC={test_acc:.2%}, IoU={test_iou:.2%} F1={test_f1:.2%}")

cm_path = str(params.job_dir / f"confusion_matrix_test{num_sleep_stages}_unet.png")
confusion_matrix_plot(
    y_true.flatten(),
    y_pred.flatten(),
    labels=class_names,
    save_path=cm_path,
    normalize="true",
)

In [6]:
import plotly.graph_objects as go

In [9]:
x = np.arange(24*60*60)

In [10]:
y = np.cos(2*np.pi*x/(24*60*60))

In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y))
fig.show()

In [5]:
ds = MesaDataset(ds_path=Path("/home/vscode/datasets"), frame_size=30*64, target_rate=64, is_commercial=True)

In [6]:
tod = ds.load_signal_for_subject(ds.test_subject_ids[0], signal_label="linetime", start=0, data_size=ds.frame_size)

In [9]:
import time

In [11]:
ts = time.strptime(tod[0], "%H:%M:%S")
ts.tm_hour * 60 * 60 + ts.tm_min * 60 + ts.tm_sec

75600

In [12]:
np.cos(2*np.pi*(ts.tm_hour * 60 * 60 + ts.tm_min * 60 + ts.tm_sec)/(24*60*60))

0.7071067811865474

In [13]:
ts

time.struct_time(tm_year=1900, tm_mon=1, tm_mday=1, tm_hour=21, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=1, tm_isdst=-1)

In [14]:
ts.tm_hour = 23

AttributeError: readonly attribute