In [1]:
import tensorflow as tf
from tensorflow.keras import Model
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

tf.random.set_seed(42)
type_train_ds = tf.data.experimental.load("/kaggle/input/cmi-tf-datasets/type_train_ds")
type_valid_ds = tf.data.experimental.load("/kaggle/input/cmi-tf-datasets/type_valid_ds")
gesture_train_ds = tf.data.experimental.load("/kaggle/input/cmi-tf-datasets/gesture_train_ds")
gesture_valid_ds = tf.data.experimental.load("/kaggle/input/cmi-tf-datasets/gesture_valid_ds")
# test_ds = tf.data.experimental.load("/kaggle/input/cmi-tf-datasets/test_ds")

2025-08-16 15:28:48.814678: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755358129.014081      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755358129.075907      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1755358141.927182      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [2]:
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [3]:
print(len(type_train_ds))
print(len(type_valid_ds))
print(len(gesture_train_ds))
print(len(gesture_valid_ds))

# print(len(test_ds))

505
48
505
48


In [4]:
for i, (inputs, targets) in enumerate(type_train_ds):
    if i==0:
        print(inputs[0].shape)
        print(inputs[1].shape)
        print(inputs[2].shape)
        print(targets.shape)
        break

(16, 70, 8, 8, 5)
(16, 70, 19)
(16, 17)
(16, 1)


In [5]:
from tensorflow.keras.layers import Conv3D, Conv2D, Conv1D, BatchNormalization, TimeDistributed, InputLayer
from tensorflow.keras.layers import MaxPool3D, MaxPool2D, GlobalMaxPool3D, GlobalMaxPool2D, AveragePooling1D, Dropout
from tensorflow.keras.layers import SimpleRNN, LSTM, GRU, Bidirectional, Flatten, Dense, GaussianNoise
from tensorflow.keras.layers import ReLU, ELU, Masking
from tensorflow.keras.regularizers import L1, L2, L1L2
from tensorflow.keras import Sequential

class RNNModel(Model):
    def __init__(self, kernel_size3d=3, kernel_size1d=3, filters_3d=[16, 32], dropout=0.2, 
                 filters_1d=[16, 32], num_rnn_layers=1, rnn_hidden_size=32, mode="RNN", 
                 bidirectional=False, hidden_size=64, regularizer="l1", l1_penalty=0.1, l2_penalty=0.1, 
                 binary=True, **kwargs):
        super().__init__(**kwargs)
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional

        # self.image_gaussian_noise = GaussianNoise(0.2)
        self.conv3d_nets = []
        for i in range(len(filters_3d)):
            conv3d_net = Sequential([
                Conv3D(filters_3d[i], kernel_size3d, padding="same", use_bias=False),
                BatchNormalization(),
                ReLU(),
                MaxPool3D(pool_size=(2, 2, 2), strides=(2, 1, 1), padding="same"),
                Dropout(dropout)
            ], name=f"conv3d_net_{i}")
            self.conv3d_nets.append(conv3d_net)
        self.global_maxpool = GlobalMaxPool3D()

        self.conv1d_nets = []
        for i in range(len(filters_1d)):
            conv1d_net = Sequential([
                Conv1D(filters_1d[i], kernel_size1d, padding="same", kernel_initializer="he_normal", use_bias=False),
                BatchNormalization(),
                ELU(),
                AveragePooling1D(pool_size=2, strides=2, padding="same"),
                Dropout(dropout)
            ], name=f"conv1d_net_{i}")
            self.conv1d_nets.append(conv1d_net)

        self.masking = Masking(mask_value=0.0)
        self.time_series_gaussian_noise = GaussianNoise(0.2)
        self.rnns = []
        # self.skip_dense = Dense(rnn_hidden_size*2, name="skip_connection_dense_bidirectional")
        for i in range(num_rnn_layers):
            return_sequences = True if i!=num_rnn_layers-1 else False
            if mode == "RNN":
                rnn = SimpleRNN(rnn_hidden_size, kernel_initializer="glorot_normal", recurrent_initializer="orthogonal", 
                                dropout=dropout, recurrent_dropout=dropout, return_sequences=return_sequences)
            elif mode == "LSTM":
                rnn = LSTM(rnn_hidden_size, kernel_initializer="glorot_normal", recurrent_initializer="orthogonal", 
                           dropout=dropout, recurrent_dropout=dropout, return_sequences=return_sequences)
            else:
                rnn = GRU(rnn_hidden_size, kernel_initializer="glorot_normal", recurrent_initializer="orthogonal", 
                          dropout=dropout, recurrent_dropout=dropout, return_sequences=return_sequences)
            if bidirectional:
                rnn = Bidirectional(rnn, name=f"{mode}_{i}")
            self.rnns.append(rnn)
        
        # self.dense_flat = Sequential([
        #     Dense(1, activation="relu"),
        #     Flatten()
        # ], name="dense_and_flatten")

        if regularizer == "l1":
            self.dense0 = Dense(hidden_size, activation="relu", 
                                kernel_regularizer=L1(l1_penalty),
                                bias_regularizer=L1(l1_penalty))
            self.dense1_0 = Dense(1, activation="sigmoid",
                                  kernel_regularizer=L1(l1_penalty),
                                  bias_regularizer=L1(l1_penalty))
            self.dense1_1 = Dense(18, activation="softmax", kernel_initializer="glorot_normal",
                                  kernel_regularizer=L1(l1_penalty),
                                  bias_regularizer=L1(l1_penalty))
        elif regularizer == "l2":
            self.dense0 = Dense(hidden_size, activation="relu", 
                                kernel_regularizer=L2(l2_penalty),
                                bias_regularizer=L2(l2_penalty))
            self.dense1_0 = Dense(1, activation="sigmoid", 
                                  kernel_regularizer=L2(l2_penalty),
                                  bias_regularizer=L2(l2_penalty))
            self.dense1_1 = Dense(18, activation="softmax", kernel_initializer="glorot_normal",
                                  kernel_regularizer=L2(l2_penalty),
                                  bias_regularizer=L2(l2_penalty))
        elif regularizer == "l1l2":
            self.dense0 = Dense(hidden_size, activation="relu", 
                                kernel_regularizer=L1L2(l1_penalty, l2_penalty),
                                bias_regularizer=L1L2(l1_penalty, l2_penalty))
            self.dense1_0 = Dense(1, activation="sigmoid", 
                                  kernel_regularizer=L1L2(l1_penalty, l2_penalty),
                                  bias_regularizer=L1L2(l1_penalty, l2_penalty))
            self.dense1_1 = Dense(18, activation="softmax", kernel_initializer="glorot_normal",
                                  kernel_regularizer=L1L2(l1_penalty, l2_penalty),
                                  bias_regularizer=L1L2(l1_penalty, l2_penalty))
        else:
            self.dense0 = Dense(hidden_size, activation="relu")
            self.dense1_0 = Dense(1, activation="sigmoid")
            self.dense1_1 = Dense(18, activation="softmax")

        self.binary = binary

    def build(self, input_shapes, training=False):
        image_shape = input_shapes[0]
        for i in range(len(self.conv3d_nets)):
            self.conv3d_nets[i].build(image_shape)
            image_shape = self.conv3d_nets[i].compute_output_shape(image_shape)
        self.global_maxpool.build(image_shape)
        image_shape = self.global_maxpool.compute_output_shape(image_shape)

        time_series_shape = input_shapes[1]
        for i in range(len(self.conv1d_nets)):
            self.conv1d_nets[i].build(time_series_shape)
            shape = self.conv1d_nets[i].compute_output_shape(time_series_shape)

        self.masking.build(time_series_shape)
        time_series_shape = self.masking.compute_output_shape(time_series_shape)
        for i in range(len(self.rnns)):
            self.rnns[i].build(time_series_shape)
            time_series_shape = self.rnns[i].compute_output_shape(time_series_shape)
        
        shape = (image_shape[0], image_shape[1] + time_series_shape[1] + input_shapes[2][1])

        self.dense0.build(shape)
        shape = self.dense0.compute_output_shape(shape)

        self.dense1_0.build(shape)
        self.dense1_1.build(shape)
        

    def call(self, inputs, training=False):
        image_out = inputs[0]
        # image_out = self.image_gaussian_noise(image_out)
        for i in range(len(self.conv3d_nets)):
            image_out = self.conv3d_nets[i](image_out, training=training)
        image_out = self.global_maxpool(image_out)   # (batch, filters)

        time_series_out = inputs[1]
        # out = tf.RaggedTensor.from_tensor(out, lengths=inputs[3])
        for i in range(len(self.conv1d_nets)):
            time_series_out = self.conv1d_nets[i](time_series_out, training=training)

        time_series_out = self.masking(time_series_out)
        mask = self.masking.compute_mask(time_series_out)
        time_series_out = self.time_series_gaussian_noise(time_series_out)
        # time_series_out = tf.ragged.boolean_mask(time_series_out, mask)
        for i in range(len(self.rnns)):
            time_series_out = self.rnns[i](time_series_out, mask=mask, training=training)
            # if i==0 and len(self.rnns)!=1:
            #     out = self.skip_dense(out) if self.bidirectional else out
            #     out = rnn_out + out
            # if i < len(self.rnns)-1:
            #     out = rnn_out + out
            # else:
            #     out = rnn_out
        # out = self.rnn(out, training=training)        # (batch, downsampled_timestep, rnn_hidden_size)
        out = tf.concat([image_out, time_series_out, inputs[2]], axis=-1)
        out = self.dense0(out)
        if self.binary:
            out = self.dense1_0(out)
        else:
            out = self.dense1_1(out)
        return out

    def set_binary(self):
        self.binary = True
        if self.dense1_0.build == False:
            self.dense1_0.build(input_shape=(None, self.hidden_size))

    def set_multi(self):
        self.binary = False
        if self.dense1_1.build == False:
            self.dense1_1.build(input_shape=(None, self.hidden_size))

    def freeze_conv_timeseries(self):
        for i in range(len(self.conv2d_nets)):
            self.conv3d_nets[i].trainable = False
        for i in range(len(self.conv1d_nets)):
            self.conv1d_nets[i].trainable = False
        self.rnn.trainable = False

In [6]:
from tensorflow.keras.losses import Loss, categorical_crossentropy, categorical_focal_crossentropy
y_true = tf.constant([[0, 1, 0, 0, 0],
                      [1, 0, 0, 0, 0],
                      [0, 0, 1, 0, 0],
                      [0, 0, 0, 1, 0],
                      [0, 0, 0, 0, 1]])
y_pred = tf.constant([[0.05, 0.8, 0.1, 0.05, 0],
                      [0.5, 0.2, 0.1, 0.1, 0.1],
                      [0.3, 0.3, 0.3, 0.05, 0.05],
                      [0.1, 0.1, 0.1, 0.6, 0.1],
                      [0, 0, 0, 0.1, 0.9]])

U = tf.range(0, 5)
non_target_idx = tf.constant([2, 3, 4])
target_idx = tf.sets.difference(tf.reshape(U, (1, -1)), tf.reshape(non_target_idx, (1, -1))).values
y_true_target = tf.gather(y_true, indices=target_idx, axis=-1)
y_pred_target = tf.gather(y_pred, indices=target_idx, axis=-1)
y_true_non_target = tf.gather(y_true, indices=non_target_idx, axis=-1)
y_pred_non_target = tf.gather(y_pred, indices=non_target_idx, axis=-1)
y_true_non_target = tf.reduce_sum(y_true_non_target, axis=-1, keepdims=True)
y_pred_non_target = tf.reduce_sum(y_pred_non_target, axis=-1, keepdims=True)

y_true = tf.concat([y_true_target, y_true_non_target], axis=-1)
y_pred = tf.concat([y_pred_target, y_pred_non_target], axis=-1)
loss = categorical_crossentropy(y_true, y_pred)
tf.reduce_mean(loss)

<tf.Tensor: shape=(), dtype=float32, numpy=0.41114503145217896>

In [45]:
from tensorflow.keras.losses import Loss, categorical_crossentropy, categorical_focal_crossentropy
from tensorflow.keras.metrics import Metric
class CustomCategoricalCrossentropy(Loss):
    def __init__(self, target_indices, label_smoothing=0.0, use_focal=False, 
                 alpha=0.25, gamma=1, name="custom_loss"):
        super().__init__(name=name)
        self.target_indices = target_indices
        u = tf.range(0, 18)
        self.non_target_indices = tf.sets.difference(tf.reshape(u, (1, -1)), tf.reshape(target_indices, (1, -1))).values
        self.label_smoothing = label_smoothing
        self.use_focal = use_focal
        self.alpha = alpha
        if isinstance(self.alpha, list):
            alpha_target = np.array(self.alpha)[self.target_indices].tolist()
            alpha_non_target = [1.0]
            self.alpha = alpha_target + alpha_non_target
        self.gamma = gamma

    def call(self, y_true, y_pred):
        y_true_target = tf.gather(y_true, indices=self.target_indices, axis=-1)
        y_pred_target = tf.gather(y_pred, indices=self.target_indices, axis=-1)
        y_true_non_target = tf.gather(y_true, indices=self.non_target_indices, axis=-1)
        y_true_non_target = tf.reduce_sum(y_true_non_target, axis=-1, keepdims=True)
        y_pred_non_target = tf.gather(y_pred, indices=self.non_target_indices, axis=-1)
        y_pred_non_target = tf.reduce_sum(y_pred_non_target, axis=-1, keepdims=True)
        y_true = tf.concat([y_true_target, y_true_non_target], axis=-1)
        y_pred = tf.concat([y_pred_target, y_pred_non_target], axis=-1)
        if self.use_focal:
            # alpha_target = np.array(self.alpha)[self.target_indices].tolist()
            # alpha_non_target = [1.0]
            # self.alpha = alpha_target + alpha_non_target
            loss = categorical_focal_crossentropy(y_true, y_pred, alpha=self.alpha, 
                                                  gamma=self.gamma, label_smoothing=self.label_smoothing)
        else:
            loss = categorical_crossentropy(y_true, y_pred, label_smoothing=self.label_smoothing)
        loss = tf.reduce_mean(loss)
        return loss


class CustomF1Score(Metric):
    def __init__(self, num_classes, target_indices, name="custom_f1_score", **kwargs):
        super().__init__(name=name, **kwargs)
        self.num_classes = num_classes
        self.target_indices = target_indices
        u = tf.range(0, 18)
        self.non_target_indices = tf.sets.difference(tf.reshape(u, (1, -1)), tf.reshape(target_indices, (1, -1))).values
        # 各クラスごとの TP, FP, FN を保持
        self.tp = self.add_weight(name="tp", shape=(num_classes,), initializer="zeros")
        self.fp = self.add_weight(name="fp", shape=(num_classes,), initializer="zeros")
        self.fn = self.add_weight(name="fn", shape=(num_classes,), initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        # # y_true が one-hot の場合は argmax
        # if y_true.shape[-1] == self.num_classes:
        #     y_true = tf.argmax(y_true, axis=-1)
        y_true_target = tf.gather(y_true, indices=self.target_indices, axis=-1)
        y_pred_target = tf.gather(y_pred, indices=self.target_indices, axis=-1)
        y_true_non_target = tf.gather(y_true, indices=self.non_target_indices, axis=-1)
        y_true_non_target = tf.reduce_sum(y_true_non_target, axis=-1, keepdims=True)
        y_pred_non_target = tf.gather(y_pred, indices=self.non_target_indices, axis=-1)
        y_pred_non_target = tf.reduce_sum(y_pred_non_target, axis=-1, keepdims=True)
        y_true = tf.concat([y_true_target, y_true_non_target], axis=-1)
        y_pred = tf.concat([y_pred_target, y_pred_non_target], axis=-1)

        y_pred = tf.argmax(y_pred, axis=-1)

        for c in range(self.num_classes):
            y_true_c = tf.cast(tf.equal(y_true, c), tf.float32)
            y_pred_c = tf.cast(tf.equal(y_pred, c), tf.float32)

            tp = tf.reduce_sum(y_true_c * y_pred_c)
            fp = tf.reduce_sum((1 - y_true_c) * y_pred_c)
            fn = tf.reduce_sum(y_true_c * (1 - y_pred_c))

            # index [c] に値を追加
            self.tp.assign_add(tf.scatter_nd([[c]], [tp], [self.num_classes]))
            self.fp.assign_add(tf.scatter_nd([[c]], [fp], [self.num_classes]))
            self.fn.assign_add(tf.scatter_nd([[c]], [fn], [self.num_classes]))

    def result(self):
        precision = self.tp / (self.tp + self.fp + 1e-7)
        recall = self.tp / (self.tp + self.fn + 1e-7)
        f1 = 2 * precision * recall / (precision + recall + 1e-7)
        return tf.reduce_mean(f1)  # macro average

    def reset_states(self):
        for var in [self.tp, self.fp, self.fn]:
            var.assign(tf.zeros_like(var))

In [8]:
for i, (inputs, targets) in enumerate(type_train_ds):
    if i == 0:
        print(inputs[0].shape)
        print(inputs[1].shape)
        print(inputs[2].shape)
        break

(16, 70, 8, 8, 5)
(16, 70, 19)
(16, 17)


In [9]:
import json

with open('/kaggle/input/cmi-tf-datasets/sample_weight.json') as f:
    sample_weight_dict = json.load(f)

type_sample_weight = sample_weight_dict["type_sample_weight"]
type_class_weight = {0: type_sample_weight[0], 1: type_sample_weight[1]}

gesture_sample_weight = sample_weight_dict["gesture_sample_weight"]
gesture_class_weight = {i: gesture_sample_weight[i] for i in range(len(gesture_sample_weight))}

In [10]:
def add_type_sample_weight(x, y):
    y_int = tf.cast(y, tf.int32)
    weight = tf.gather([type_class_weight[0], type_class_weight[1]], y_int)
    return x, y, weight

def add_gesture_sample_weight(x, y):
    y_arg = tf.argmax(y)
    weight = tf.gather(gesture_sample_weight, y_arg)
    return x, y, weight

In [11]:
type_train_ds = type_train_ds.unbatch().map(add_type_sample_weight).batch(32, drop_remainder=True)
type_valid_ds = type_valid_ds.unbatch().map(add_type_sample_weight).batch(32, drop_remainder=True)

In [12]:
# gesture_train_ds = gesture_train_ds.unbatch().map(add_gesture_sample_weight).batch(16)
# gesture_valid_ds = gesture_valid_ds.unbatch().map(add_gesture_sample_weight).batch(16)
gesture_train_ds = gesture_train_ds.unbatch().batch(256)
gesture_valid_ds = gesture_valid_ds.unbatch().batch(256)

In [13]:
def scheduler(epoch):
    lr = 0.001
    if epoch < 20:
        return lr
    elif 20 <= epoch and epoch < 40:
        return lr * (1/2)
    elif 40 <= epoch and epoch < 60:
        return lr * (1/4)
    elif 60 <= epoch and epoch < 80:
        return lr * (1/8)
    else:
        return lr * (1/16)

In [46]:
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy, CategoricalFocalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import BinaryAccuracy, F1Score, AUC

model = RNNModel(kernel_size3d=(3, 3, 3), kernel_size1d=3, filters_3d=[8, 16, 32], dropout=0.1,
                 filters_1d=[], num_rnn_layers=1, rnn_hidden_size=128, mode="LSTM", 
                 bidirectional=False, hidden_size=64, regularizer="l1l2", 
                 l1_penalty=1e-6, l2_penalty=1e-6, binary=False)
model.build(input_shapes=((None, None, 8, 8, 5), (None, None, 19), (None, 17)))
# model.set_multi()
# model.compile(loss=CategoricalFocalCrossentropy(alpha=gesture_sample_weight, 
#                                                 gamma=3,
#                                                 label_smoothing=0.1),
#               optimizer=Adam(learning_rate=1e-2),
#               metrics=["accuracy"])
target_indices = [0, 1, 3, 4, 6, 7, 9, 10]
model.compile(loss=CustomCategoricalCrossentropy(target_indices=target_indices,
                                                 label_smoothing=0.1,
                                                 use_focal=True,
                                                 alpha=gesture_sample_weight, 
                                                 gamma=0),
              optimizer=Adam(learning_rate=5e-3),
              metrics=["accuracy", 
                       CustomF1Score(num_classes=9, target_indices=target_indices)
                      ]
             )
model.summary()
# model.freeze_conv_timeseries()

In [47]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
checkpoint_filepath = "/kaggle/working/cmi_best_model.weights.h5"
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor="val_loss",
    save_best_only=True,
    save_weights_only=True,
    mode="min",
    verbose=1
)

lr_callback = LearningRateScheduler(scheduler, verbose=0)

history = model.fit(gesture_train_ds, epochs=200, 
                    validation_data=gesture_valid_ds,
                    callbacks=[checkpoint_callback])
model.load_weights(checkpoint_filepath)

Epoch 1/200


InvalidArgumentError: Graph execution error:

Detected at node mul_35 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 712, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 211, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 499, in process_one

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 730, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 383, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 528, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/tmp/ipykernel_36/1919387798.py", line 14, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 84, in train_step

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/trainer.py", line 490, in compute_metrics

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/compile_utils.py", line 334, in update_state

  File "/usr/local/lib/python3.11/dist-packages/keras/src/trainers/compile_utils.py", line 21, in update_state

  File "/tmp/ipykernel_36/4251263825.py", line 72, in update_state

required broadcastable shapes
	 [[{{node mul_35}}]] [Op:__inference_multi_step_on_iterator_249919]

In [26]:
import json

with open('/kaggle/input/cmi-tf-datasets/mapping.json') as f:
    mapping_dict = json.load(f)

gesture_mapping = mapping_dict["gesture_mapping"]
inv_gesture_mapping = {value: key for key, value in gesture_mapping.items()}

num2gesture = np.vectorize(lambda x: inv_gesture_mapping[x])

In [27]:
non_target_gestures = ["Drink from bottle/cup", "Glasses on/off", "Pull air toward your face",
                       "Pinch knee/leg skin", "Scratch knee/leg skin", "Write name on leg",
                       "Text on phone", "Feel around in tray and pull out an object",
                       "Write name in air", "Wave hello"]

def map_non_target(y_ind):
    y_pred = inv_gesture_mapping[y_ind]
    if y_ind == 3:
        y_ind = 2
    elif y_ind == 4:
        y_ind = 3
    elif y_ind == 6:
        y_ind = 4
    elif y_ind == 7:
        y_ind = 5
    elif y_ind == 9:
        y_ind = 6
    elif y_ind == 10:
        y_ind = 7
    if y_pred in non_target_gestures:
        y_ind = 8
    return y_ind

vectorize_map_non_target = np.vectorize(map_non_target)

In [32]:
conf_tensor = np.zeros((9, 9), dtype=np.int32)
for i, (inputs, labels) in enumerate(gesture_valid_ds):
    labels_pred = model.predict(inputs, verbose=0)
    labels_pred = tf.argmax(labels_pred, axis=-1).numpy()
    labels_pred = vectorize_map_non_target(labels_pred)
    labels_true = tf.argmax(labels, axis=-1).numpy()
    labels_true = vectorize_map_non_target(labels_true)
    conf_tensor += tf.math.confusion_matrix(labels_true, labels_pred, num_classes=9)

In [33]:
precisions = []
recalls = []
f1s = []

for i in range(9):
    column = conf_tensor[i, :]
    row = conf_tensor[:, i]
    precision = column[i] / tf.math.reduce_sum(column)
    recall = row[i] / tf.math.reduce_sum(row)
    inv_f1 = (1/precision + 1/recall)/2
    f1 = 1/inv_f1
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)
print(f"F1 score mean: {np.round(np.mean(f1s), 3)}")

F1 score mean: 0.51


In [None]:
# import os
# save_path = os.path.join("/kaggle/working/", "cmi_model.weights.h5")
# model.save_weights(save_path)

In [None]:
new_model = RNNModel(kernel_size3d=(5, 3, 3), kernel_size1d=5, filters_3d=[8, 16, 32], dropout=0.2,
                     filters_1d=[], num_rnn_layers=1, rnn_hidden_size=128, mode="LSTM", 
                     bidirectional=False, hidden_size=64, regularizer="l1l2", 
                     l1_penalty=1e-6, l2_penalty=1e-6, binary=False)
new_model.build(input_shapes=((None, None, 8, 8, 5), (None, None, 19), (None, 17)))
new_model.load_weights(checkpoint_filepath)
new_model.summary()