In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import os
import random
import pandas as pd
import cv2
import time

from google.colab import drive
drive.mount('/content/drive')

In [None]:
WINDOW_SIZE = 5
MAX_PERSONS = 6
NO_COLS = len(["neck_x", "neck_y", "right shoulder_x", "right shoulder_y", "right elbow_x", "right elbow_y", "right wrist_x",
              "right wrist_y", "left shoulder_x", "left shoulder_y", "left elbow_x", "left elbow_y", "left wrist_x", "left wrist_y"])
CHANNELS = 1

epochs = 25
batch_size = 22
lr = 1e-3
split1 = 0.7
split2 = 0.9

In [None]:
files = os.listdir("/content/drive/MyDrive/GSoC/npy_files/")
random.seed(42)
random.shuffle(files)
files = ["/content/drive/MyDrive/GSoC/npy_files/"+fil for fil in files]
samples = len(files)
l1 = int(samples*split1)
l2 = int(samples*split2)
files_train, files_val, files_test = files[:l1], files[l1:l2], files[l2:]

In [None]:
x_train , y_train, x_val, y_val = [], [], [], []

for fil in files_train[:14]:
    with open(fil, "rb") as npf:
        data = np.load(npf, allow_pickle=True)
    for frame, d, lb in data:

        x_train.append(np.array([d], dtype=np.float32))   # 1 channel required
        y_train.append(lb)

for fil in files_val:
    with open(fil, "rb") as npf:
        data = np.load(npf, allow_pickle=True)
    for frame, d, lb in data:

        x_val.append(np.array([d], dtype=np.float32))   # 1 channel required
        y_val.append(lb)


x_train = np.array(x_train, dtype=np.float32)
y_train = np.array(y_train, dtype=int)
x_val = np.array(x_val, dtype=np.float32)
y_val = np.array(y_val, dtype=int)

In [None]:
def get_model():
    inp = layers.Input(shape=(CHANNELS, WINDOW_SIZE, MAX_PERSONS, NO_COLS))
    #lstm layers
    x = layers.ConvLSTM2D(filters=32, kernel_size=(3,3), padding="same", return_sequences=True, activation="relu", data_format="channels_first")(inp)
    x = layers.BatchNormalization()(x)
    #cnn layers
    x = layers.Conv3D(filters=32,kernel_size=(3,3,3),padding="same", activation="relu", data_format="channels_first",)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv3D(filters=32,kernel_size=(3,3,3),padding="same", activation="relu", data_format="channels_first",)(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool3D(pool_size=(3,3,3),strides=(1,1,1), data_format="channels_first")(x)
    x = layers.Conv3D(filters=64, kernel_size=(3,3,3), padding="same", activation="relu", data_format="channels_first",)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv3D(filters=64, kernel_size=(3,3,3), padding="same", activation="relu", data_format="channels_first",)(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling3D(data_format="channels_first")(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(units=128,activation="relu",kernel_regularizer=tf.keras.regularizers.l2())(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(units=128,activation="relu",kernel_regularizer=tf.keras.regularizers.l2())(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(units=1, activation="sigmoid")(x)

    model = tf.keras.models.Model(inp, x)
    model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=lr), metrics=['accuracy',tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

    return model

In [None]:
import os
import time
import tensorflow as tf

tf.keras.backend.clear_session()
model2 = get_model()
print(model2.summary())

logs_dir = "/content/models_dir/logs/"
os.makedirs(logs_dir, exist_ok=True)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=8, verbose=1)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=4, verbose=1, factor=0.5)
save_model = tf.keras.callbacks.ModelCheckpoint(f"/content/models_dir/best_model_t{int(time.time())}_w{WINDOW_SIZE}_p{MAX_PERSONS}_c{NO_COLS}.h5", monitor="val_accuracy", mode="max", verbose=1, save_best_only=True)
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=logs_dir)

In [None]:
model2.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=20,
    validation_data=(x_val, y_val),
    callbacks=[early_stopping, reduce_lr, save_model, tensorboard],
)

In [None]:
tf.keras.backend.clear_session()
model = tf.keras.models.load_model(f"/content/drive/MyDrive/GSoC/best_model_t1692955173_w5_p6_c14.h5")

model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

In [None]:
import numpy as np

x_val = []
y_val = []

with open("/content/drive/MyDrive/GSoC/npy_files/2014-12-02_0000_US_KNBC_The_Ellen_DeGeneres_Show_1405-1740_npy-train_w5_p6_r0.025.npy", "rb") as npf:
    data = np.load(npf, allow_pickle=True)

for item in data:
    if isinstance(item, tuple) and len(item) >= 2:
        frame = item[0]  # Extract the frame or label
        d = item[1]      # Extract the data
        x_val.append(np.array([d], dtype=np.float32))  # Assuming 'd' is the data
        y_val.append(frame)  # Append the frame or label
    else:
        # Handle other cases where the structure of 'data' doesn't match expectations
        pass

x_val = np.array(x_val, dtype=np.float32)
y_val = np.array(y_val, dtype=int)

In [None]:
x_test = []
y_test = []

fil = files_test[0]
with open(fil, "rb") as npf:
    data = np.load(npf, allow_pickle=True)
for frame, d, lb in data:
    x_test.append(np.array([d], dtype=np.float32))   # 1 channel required
    y_test.append(lb)

In [None]:
r = model.predict(np.array(x_test, dtype=np.float32), verbose=1)

results = []
for frame_data, result in zip(data, r):
    frame = frame_data[0]  # Assuming frame data is a sequence with frame at index 0
    d = frame_data[1]      # Assuming d is at index 1 in frame_data

    results.append([frame, result[0] > 0.5])

In [None]:
df = pd.DataFrame(results, columns = ["frame", "gesture"])
frames_with_gesture = df[df["gesture"] == True]["frame"].to_numpy()

np.save('/content/drive/MyDrive/GSoC/frames_with_gesture.npy', frames_with_gesture)