# Installation, data loading, and imports

In [1]:
!pisces_setup

Converting Mads Olsen model to Keras...
Model saved at /Users/eric/Engineering/Work/pisces/pisces/cached_models/mo_resunet.keras


In [2]:
from nhrc_utils.analysis import *

LR_KERNEL_SIZE: 61


In [3]:
import numpy as np
from typing import List
import tensorflow as tf
import pisces.models as pm
from matplotlib import pyplot as plt
from sklearn.model_selection import LeaveOneOut
import matplotlib.pyplot as plt
from sklearn.metrics import auc, roc_curve
from typing import List

from tqdm import tqdm

import os
from pathlib import Path

CWD = Path(os.getcwd())
# NB! these are different from preprocess_data.ipynb
save_path = CWD.joinpath("fine_tuning")

hybrid_path = save_path.joinpath("hybrid")
os.makedirs(hybrid_path, exist_ok=True)
disordered_path = save_path.joinpath("disordered")
os.makedirs(disordered_path, exist_ok=True)
walch_path = save_path.joinpath("walch")
os.makedirs(walch_path, exist_ok=True)



# Training

## Create data tensors for training + evaluating

In [5]:
dataset = "stationary"
acc_hz = "50"
walch_preprocessed_data = np.load(f'./pre_processed_data/{dataset}/{dataset}_preprocessed_data_{acc_hz}.npy',
                                   allow_pickle=True).item()
walch_keys = list(walch_preprocessed_data.keys())
walch_data_bundle = prepare_data(walch_preprocessed_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


In [6]:
walch_data_bundle.mo_predictions.shape

TensorShape([28, 1024, 4])

In [7]:
walch_data_bundle.spectrogram.shape

TensorShape([28, 15360, 32])

## Train LR CNN

In [8]:
import datetime
from tensorflow.keras.callbacks import TensorBoard

# Set up separate log directories for each model
log_dir_lr = f"./logs/lr_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
log_dir_cnn = f"./logs/cnn_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"

# Configure TensorBoard callback
cnn_tensorboard_callback = TensorBoard(log_dir=log_dir_cnn, histogram_freq=1)
lr_tensorboard_callback = TensorBoard(log_dir=log_dir_lr, histogram_freq=1)

In [9]:

from analyses.NHRC.nhrc_utils.model_definitions import LR_CNN_NAME,  LABEL_SHAPE, LR_INPUT_SHAPE, WeightedModel, build_lr_cnn


split_maker = pm.LeaveOneOutSplitter()

training_results = []
lr_predictors = []

print(f"Training {LR_CNN_NAME} models...")
print("Saving models to", DEFAULT_EVALUATION_DIR)

# Split the data into training and testing sets
for k_train, k_test in tqdm(split_maker.split(walch_keys), desc="Next split", total=len(walch_keys)):
    # Convert indices to tensors
    train_idx_tensor = tf.constant(k_train, dtype=tf.int32)
    test_idx_tensor = tf.constant(k_test, dtype=tf.int32)

    # Gather the training and validation data using tf.gather
    # training
    train_data = tf.reshape(
        tf.gather(walch_data_bundle.activity, train_idx_tensor),
        LR_INPUT_SHAPE)
    train_labels = tf.reshape(
        tf.gather(walch_data_bundle.true_labels, train_idx_tensor),
        LABEL_SHAPE)
    train_sample_weights = tf.reshape(
        tf.gather(walch_data_bundle.sample_weights, train_idx_tensor),
        LABEL_SHAPE)

    # make the labels binary, -1 -> 0
    # since we incorporate the mask in the sample weights, we can just set the labels to 0
    train_labels_masked = tf.where(train_sample_weights > 0, train_labels, 0.0)

    # z-normalize input data
    train_data = (train_data - tf.reduce_mean(train_data)) / np.std(train_data)


    # Custom loss function that includes the sample weights
    lr_cnn = build_lr_cnn()
    weighted_lr_cnn = WeightedModel(lr_cnn)
    weighted_lr_cnn.compile(
        optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-3),
    )

    dataset = tf.data.Dataset.from_tensor_slices(
        (train_data, train_labels_masked, train_sample_weights))
    dataset = dataset.batch(32)
    training_results.append(weighted_lr_cnn.fit(
        dataset,
        epochs=350, 
        verbose=0,
        callbacks=[lr_tensorboard_callback]
    ))


    lr_predictors.append(lr_cnn)
    lr_path = make_lr_filename(walch_keys[k_test[0]])
    lr_cnn.save(lr_path)

Training LR CNN models...
Saving models to /Users/eric/Engineering/Work/pisces/analyses/NHRC/evaluations


Next split: 100%|██████████| 28/28 [01:46<00:00,  3.80s/it]


In [10]:
import pandas as pd
from tqdm import tqdm

from analyses.NHRC.nhrc_utils.model_definitions import FINETUNING_INPUT_SHAPE, build_finetuning_model, EXTRA_LAYERS_NAME

split_maker = pm.LeaveOneOutSplitter()

training_results = []
evaluations = []
cnn_predictors = []

print(f"Training {EXTRA_LAYERS_NAME} models...")
print("Saving models to", DEFAULT_EVALUATION_DIR)

def finetuning_gather_reshape(data_bundle: PreparedData, train_idx_tensor: tf.Tensor, input_shape: tuple = FINETUNING_INPUT_SHAPE, output_shape: tuple = LABEL_SHAPE) -> tuple | None:
    train_data = tf.reshape(
        tf.gather(data_bundle.mo_predictions, train_idx_tensor),
        input_shape
        )
    train_labels = tf.reshape(
        tf.gather(data_bundle.true_labels, train_idx_tensor),
        output_shape)
    train_sample_weights = tf.reshape(
        tf.gather(data_bundle.sample_weights, train_idx_tensor),
        output_shape)
    return train_data, train_labels, train_sample_weights

# Split the data into training and testing sets
for k_train, k_test in tqdm(split_maker.split(walch_keys), desc="Next split", total=len(walch_keys)):
    # Convert indices to tensors
    train_idx_tensor = tf.constant(k_train, dtype=tf.int32)
    test_idx_tensor = tf.constant(k_test, dtype=tf.int32)

    # Gather the training and validation data using tf.gather
    # training
    train_data, train_labels, train_sample_weights = finetuning_gather_reshape(walch_data_bundle, train_idx_tensor)

    # make the labels binary, -1 -> 0
    # since we incorporate the mask in the sample weights, we can just set the labels to 0
    train_labels_masked = tf.where(train_sample_weights > 0, train_labels, 0.0)

    # Train the model on the training set
    cnn = build_finetuning_model(FINETUNING_INPUT_SHAPE[1:])
    
    cnn.compile(
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        optimizer=tf.keras.optimizers.AdamW(learning_rate=5e-4),
    )

    # gives weight 0 to -1 "mask" intervals, 1 to the rest


    # make the labels binary, -1 -> 0
    # since we incorporate the mask in the sample weights,
    # we can just set the labels to 0
    train_labels_masked = np.where(train_sample_weights, train_labels, 0)

    training_results.append(cnn.fit(
        train_data, train_labels_masked, 
        epochs=100, 
        validation_split=0.0, 
        batch_size=1,
        sample_weight=train_sample_weights,
        verbose=0,
        callbacks=[cnn_tensorboard_callback]
    ))


    cnn_predictors.append(cnn)
    cnn_path = make_finetuning_filename(walch_keys[k_test[0]])

    # save the trained model weights
    cnn.save(cnn_path)




Training Fine Tuning models...
Saving models to /Users/eric/Engineering/Work/pisces/analyses/NHRC/evaluations


Next split: 100%|██████████| 28/28 [04:42<00:00, 10.11s/it]
