In [138]:
import numpy as np
import pandas as pd
from typing import Tuple
import os
from scipy import stats
import tensorflow as tf
from tensorflow import keras

In [18]:
# UP and DOWN sampling functions
def nearest_neighbors(to_time:np.ndarray, from_time:np.ndarray, y:np.ndarray) -> np.ndarray:
    y_upsample = []
    for n_t in to_time:
        diff = np.absolute(from_time - n_t)
        index = diff.argmin()
        y_upsample.append(y[index])
    return np.array(y_upsample)

nn_upsample = lambda x_time, y_time, y: nearest_neighbors(to_time=x_time, from_time=y_time, y=y)
nn_downsample = lambda x_time, y_time, y: nearest_neighbors(to_time=y_time, from_time=x_time, y=y)

In [116]:
def read_raw_data(dir_path:str='TrainingData') -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    '''
    Reads in the raw data from the directory path and returns X, UPSAMPLED (using nearest neighbors) y,
    and subject ids.
    '''
    
    # Unque path stems for subject id and trial number
    path_stems = sorted(set([path[:14] for path in os.listdir(dir_path)]))
    
    # URLs following naming conventions in directory
    x_leaf = '__x'
    x_time_leaf = '__x_time'
    y_leaf = '__y'
    y_time_leaf = '__y_time'
    
    X_total = []
    y_total = []
    subject_ids = []
    for stem in path_stems:
        X = pd.read_csv(os.path.join(dir_path, f'{stem}{x_leaf}.csv'), header=None).to_numpy()
        x_t = pd.read_csv(os.path.join(dir_path, f'{stem}{x_time_leaf}.csv'), header=None).to_numpy()
        y = pd.read_csv(os.path.join(dir_path, f'{stem}{y_leaf}.csv'), header=None).to_numpy()
        y_t = pd.read_csv(os.path.join(dir_path, f'{stem}{y_time_leaf}.csv'), header=None).to_numpy()
        
        # Upsample y using nearest neighbors
        y = nn_upsample(x_time=x_t, y_time=y_t, y=y)
        subject_ids.extend([stem.split('_')[1]]*len(X))
        
        X_total.append(X)
        y_total.append(y)
    
    return (
        np.vstack(X_total),
        np.vstack(y_total),
        np.array(subject_ids)
    )

In [117]:
X, y, subject_ids = read_raw_data()

In [118]:
X.shape, y.shape, subject_ids.shape

((1341646, 6), (1341646, 1), (1341646,))

In [126]:
# Pull subject 6 out for validation
X_train_arr = X[subject_ids != '006', :]
X_val_arr = X[subject_ids == '006', :]
assert X_val_arr.shape[0] + X_train_arr.shape[0] == X.shape[0]

y_train_arr = y[subject_ids != '006', :]
y_val_arr = y[subject_ids == '006', :]
assert y_train_arr.shape[0] + y_val_arr.shape[0] == y.shape[0]

In [127]:
def create_dataset(X, y, time_steps=1, step=1):
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps, step):
        v = X[i:(i + time_steps)]
        labels = y[i: i + time_steps]
        Xs.append(v)        
        ys.append(stats.mode(labels)[0][0])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

window_in_sec = 2
stride_in_overlap = .5 # percentage of overlap between windows
TIME_STEPS = int(window_in_sec*40)
STEP = int(stride_in_overlap * TIME_STEPS)

X_train, y_train = create_dataset(
    X_train_arr, 
    y_train_arr, 
    TIME_STEPS, 
    STEP
)

X_val, y_val = create_dataset(
    X_val_arr, 
    y_val_arr, 
    TIME_STEPS, 
    STEP
)

  ys.append(stats.mode(labels)[0][0])


In [134]:
batch_size = 1048

# Include the epoch in the file name (uses `str.format`)
checkpoint_path = "jacob_training/cp-best-f1_score.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights every 5 epochs
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    monitor = 'val_f1_score',
    mode = 'max',
    save_best_only=True,
 )

In [136]:
import tensorflow_addons as tfa
f1_score = tfa.metrics.F1Score(
    num_classes = 4,
    average = 'macro',
    name = 'f1_score'
)


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

2023-04-17 17:01:08.305883: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-04-17 17:01:08.306448: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://ww

In [139]:
# tf.config.list_physical_devices('GPU')
model = keras.Sequential()
model.add(keras.Input(shape=(X_train.shape[1], X_train.shape[2])))
model.add(
    keras.layers.Bidirectional(
      keras.layers.LSTM(
          units=256, 
          recurrent_dropout = .2
      )
    )
)
model.add(keras.layers.Dense(units=128, activation='relu'))
model.add(keras.layers.Dense(y_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',f1_score])

model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 512)              538624    
 l)                                                              
                                                                 
 dense (Dense)               (None, 128)               65664     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 604,417
Trainable params: 604,417
Non-trainable params: 0
_________________________________________________________________


In [140]:
# Scaling by total/4 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
'''
weight_for_0 = (1 / class0) * (total / 4.0)
weight_for_1 = (1 / class1) * (total / 4.0)
weight_for_2 = (1 / class2) * (total / 4.0)
weight_for_3 = (1 / class3) * (total / 4.0)
'''
# manuals weights based on confusion matrix
weight_for_0 = .25
weight_for_1 = 1.2
weight_for_2 = 1.1
weight_for_3 = 3

class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
print('Weight for class 2: {:.2f}'.format(weight_for_2))
print('Weight for class 3: {:.2f}'.format(weight_for_3))


Weight for class 0: 0.25
Weight for class 1: 1.20
Weight for class 2: 1.10
Weight for class 3: 3.00


In [141]:
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=batch_size,
    validation_data = (X_val, y_val),
    shuffle=False,
    callbacks=[cp_callback],
    class_weight=class_weight
)

Epoch 1/20


2023-04-17 17:06:22.113219: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 57060480 exceeds 10% of free system memory.
  return dispatch_target(*args, **kwargs)


ValueError: in user code:

    File "/home/jacob/ece542_repos/c2_competition/venv/lib/python3.8/site-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/home/jacob/ece542_repos/c2_competition/venv/lib/python3.8/site-packages/tensorflow_addons/metrics/f_scores.py", line 160, in update_state  *
        self.true_positives.assign_add(_weighted_sum(y_pred * y_true, sample_weight))

    ValueError: Dimension 0 in both shapes must be equal, but are 4 and 1. Shapes are [4] and [1]. for '{{node AssignAddVariableOp_25}} = AssignAddVariableOp[dtype=DT_FLOAT](AssignAddVariableOp_25/resource, Sum_3)' with input shapes: [], [1].
