In [2]:
# Import necessary libraries
import cv2
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.sequence import pad_sequences
tf.config.run_functions_eagerly(True)




In [3]:
FRAME_SIZE = (224, 224)       # Size of each video frame
DECEPTION_CLASSES = 1         # Binary deception classification
BATCH_SIZE = 1
PAD_LENGTH = 600
EPOCHS = 1               # Reduced for demonstration purposes
NUM_FRAMES=16
FRAME_CLUSTER = 1
DIR_WIN = 'C:/Users/jettc/Downloads/Tony file2/'
DIR_LIN = '/mnt/c/Users/jettc/Downloads/Tony file2/'
VIDEO_DIR = DIR_LIN    # Directory where video files are stored
LABEL_FILE_PATH = VIDEO_DIR+'Actions folder/Tony Gestures (Deceptive 10.9.23) .xlsx'

# Columns corresponding to action labels in the dataset
ACTION_COLUMNS = ['forewardHead', 'tiltHead', 'downHead', 'scanningHead', 'reflexHead', 'vigilantGaze', 'orientingGaze', 
                  'downGaze', 'otherGaze', 'scanningGaze', 'foldedArms', 'holdingArms', 'hidingArms', 'soothingArms', 
                  'otherArms', 'interlockedHand', 'holdingHands', 'hidingHand', 'soothinHandM', 'wringingHandM', 'distractingHands']

ACTION_CLASSES = len(ACTION_COLUMNS)           # Number of action classes


In [196]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [197]:
import os
os.getcwd()

'/mnt/c/Users/jettc/OneDrive - Swinburne University/5th Year/Semester 2/Computing Technology Project B'

In [200]:
def load_labels(label_file_path):
    """Load labels for actions and deception from the Excel file."""
    df = pd.read_excel(label_file_path, sheet_name='Tony Gestures_Deceptive and Tru')
    
    
    DECEPTION_COLUMN = 'class'
    
    video_files = []
    action_labels = []
    deception_labels = []
    
    df = df.dropna(subset=ACTION_COLUMNS)
    
    # Extract video paths and labels
    for index, row in df.iterrows():
        video_file = os.path.join(VIDEO_DIR, str(row['id']))
        if not os.path.exists(video_file):
            print(f"Warning: Video file {video_file} not found!")
            continue
        # Extract action labels
        actions = row[ACTION_COLUMNS].values.astype(float)
        #actions = np.tile(actions, (PAD_LENGTH, 1))
        action_labels.append(actions)
        
        # Deception label
        deception = 1 if 'deceptive' in str(row[DECEPTION_COLUMN]).lower().strip() else 0
        deception_labels.append(deception)
        video_files.append(video_file)
    
    return video_files, np.array(action_labels), np.array(deception_labels)


In [201]:
# Load labels and video file paths
video_files, action_labels, deception_labels = load_labels(LABEL_FILE_PATH)

print("Number of videos:", len(video_files))
print("Action labels shape:", action_labels.shape)
print("Deception labels shape:", deception_labels.shape)


  warn(msg)


Number of videos: 168
Action labels shape: (168, 21)
Deception labels shape: (168,)


In [202]:
def load_every_nth_frame(video_path, n=NUM_FRAMES, target_size=FRAME_SIZE):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Unable to open video file {video_path}")
        return None

    frames = []
    frame_count = 0
    success, frame = cap.read()

    while success:
        # Only process every nth frame
        if frame_count % n == 0:
            # Resize the frame to reduce memory usage if necessary
            if target_size:
                frame = cv2.resize(frame, target_size)
            frames.append(frame)

        # Skip to the next frame
        success, frame = cap.read()
        frame_count += 1

    cap.release()
    return np.array(frames, dtype=np.float32)


In [203]:
# Split the dataset
video_train, video_test, action_train, action_test, deception_train, deception_test = train_test_split(
    video_files, action_labels, deception_labels, test_size=0.2, random_state=42)

del video_files, action_labels, deception_labels


In [204]:
# Pad all videos to the same length (PAD_LENGTH)
def pad_videos(frames, maxlen=PAD_LENGTH):
    # Get frame dimensions (height, width, channels)
    frame_height, frame_width, channels = frames[0].shape
    
    # Initialize an array of zeros for padding (with shape of maxlen frames)
    padded_frames = np.zeros((maxlen, frame_height, frame_width, channels), dtype=np.float32)
    
    # Determine how many frames to copy
    num_frames = min(len(frames), maxlen)
    
    # Copy the original frames into the padded array
    padded_frames[:num_frames] = frames[:num_frames]
    
    return padded_frames


In [4]:
# Load Vision Transformer model from TensorFlow Hub
vit_layer = hub.KerasLayer("https://tfhub.dev/sayakpaul/vit_b32_fe/1", trainable=True)

# Function to preprocess a batch of frames using the Vision Transformer
def preprocess_frames_with_vit(frames_batch):
    """
    Given a batch of frames, process each frame through the Vision Transformer to extract embeddings.
    :param frames_batch: A numpy array of shape (NUM_FRAMES, 224, 224, 3)
    :return: A numpy array of embeddings for each frame (NUM_FRAMES, embedding_dim)
    """
    embeddings = []
    for frame in frames_batch:
        frame = frame.astype('float32') / 255.0
        
        # Ensure each frame is processed individually
        frame = np.expand_dims(frame, axis=0)  # Add batch dimension
        embedding = vit_layer(frame)           # Get the ViT embedding for this frame
        embeddings.append(embedding)
    
    # Convert the list of embeddings to a NumPy array before checking the shape
    embeddings = np.vstack(embeddings)
    print(f"Shape of frame embeddings: {embeddings.shape}")
    
    return embeddings  # Return the stacked embeddings as a sequence

# Compute embedding_dim using a sample frame
sample_frame = np.random.rand(1, 224, 224, 3).astype(np.float32)  # A random sample frame
sample_embedding = vit_layer(sample_frame)
embedding_dim = sample_embedding.shape[-1]  # Get the embedding dimension



















In [206]:
# Build the LSTM and Multi-task Learning Model
def build_lstm_model(embedding_dim, batch_size=BATCH_SIZE):
    inputs = layers.Input(batch_shape=(batch_size, FRAME_CLUSTER, embedding_dim))  # batch_shape for stateful LSTM
    print(f"Input shape to the model: {inputs.shape}")
    
    # Masking to ignore padded frames
    #masked_inputs = layers.Masking(mask_value=0.0)(inputs)  # Assumes padding value is 0
    
    # LSTM for temporal modeling across the frames
    dense_action = layers.Dense(128, activation='relu')(inputs)
    attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=128)(dense_action, dense_action)
    dropout_action = layers.Dropout(0.3)(attention_output)


    # Action detection: A TimeDistributed layer to classify actions for each frame
    action_output = layers.Dense(ACTION_CLASSES, activation='sigmoid', name="action_output")(dropout_action)
    
    # LSTM for deception classification (stateful)
    lstm_deception = layers.LSTM(64, return_sequences=True, stateful=True)(inputs)
    dropout_deception = layers.Dropout(0.3)(lstm_deception)
    
    # Global pooling to summarize the LSTM output for deception classification
    #pooled_output = layers.GlobalAveragePooling1D()(lstm_deception)
    #pooled_output = layers.GlobalAveragePooling1D()(lstm_deception, mask=lstm_deception._keras_mask)
    
    # **Concatenate the action outputs with the LSTM outputs for deception classification**
    concat_deception_input = layers.Concatenate()([dropout_deception, action_output])

    # Further process the combined information for deception classification
    deception_dense = layers.Dense(64, activation='relu')(concat_deception_input)
    dropout_combined = layers.Dropout(0.3)(deception_dense)
    
    # Global pooling to summarize the LSTM and action output combined for deception classification
    pooled_output = layers.GlobalAveragePooling1D()(dropout_combined)
    
    # Deception classification: A binary classification head
    deception_output = layers.Dense(DECEPTION_CLASSES, activation='sigmoid', name="deception_output")(pooled_output)

    
    # Create the model with two outputs
    model = models.Model(inputs=inputs, outputs=[action_output, deception_output])
    
    return model

#Build model
model = build_lstm_model(embedding_dim)

Input shape to the model: (1, 1, 768)


In [207]:
def load_videos(video_list):
    videos = []
    for v in video_list:
        print(v)
        frames = load_every_nth_frame(v)
        videos.append(frames)
    return videos

#X_train = load_videos(video_train)
#X_test = load_videos(video_test)

In [209]:
def preprocess_videos_and_save(video_paths, action_labels, deception_labels, batch_size=10, save_dir='preprocessed_videos', is_train=True):
    """
    Preprocess videos using load_videos, preprocess_frames_with_vit, and pad_videos,
    then save preprocessed videos along with action and deception labels to disk.
    """
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Step 1: Process videos in batches
    for i in range(0, len(video_paths), batch_size):
        batch_files = video_paths[i:i + batch_size]  # Get batch of video paths
        batch_action_labels = action_labels[i:i + batch_size]  # Get batch of action labels
        batch_deception_labels = deception_labels[i:i + batch_size]  # Get batch of deception labels

        # Step 2: Use your existing load_videos function to load this batch
        videos = load_videos(batch_files)  # This returns a list of videos (each as a list of frames)

        # Step 3: Preprocess and pad the loaded videos using your existing functions
        preprocessed_batch = []
        for video in videos:
            preprocessed_frames = preprocess_frames_with_vit(video)  # Your custom preprocessing function
            preprocessed_batch.append(preprocessed_frames)

        # Step 4: Save preprocessed videos and corresponding action and deception labels to disk
        for idx, video_file in enumerate(batch_files):
            video_name = os.path.basename(video_file).split('.')[0]  # Extract the video file name
            
            # Save the preprocessed frames to disk as a .npy file
            np.save(os.path.join(save_dir, f'{video_name}_preprocessed.npy'), preprocessed_batch[idx])
            
            # Save the corresponding action label for this video
            np.save(os.path.join(save_dir, f'{video_name}_action_label.npy'), batch_action_labels[idx])
            
            # Save the corresponding deception label for this video
            np.save(os.path.join(save_dir, f'{video_name}_deception_label.npy'), batch_deception_labels[idx])
        
        print(f"{'Training' if is_train else 'Testing'} batch {i // batch_size + 1}/{len(video_paths) // batch_size + 1} processed and saved.")


In [None]:
# Preprocess and save training videos along with both action and deception labels
preprocess_videos_and_save(video_train, action_train, deception_train, batch_size=BATCH_SIZE, save_dir='preprocessed_train', is_train=True)

# Preprocess and save testing videos along with both action and deception labels
preprocess_videos_and_save(video_test, action_test, deception_test, batch_size=BATCH_SIZE, save_dir='preprocessed_test', is_train=False)


In [189]:
print(np.load('preprocessed_train/trial_lie_060_deception_label.npy'))

0


import numpy as np
import tensorflow as tf
import os

import logging

# Set up logging to track skipped files
logging.basicConfig(filename='skipped_files.log', level=logging.WARNING)

class VideoDataSequence(tf.keras.utils.Sequence):
    def __init__(self, video_paths, batch_size, save_dir):
        """
        Custom data sequence to load preprocessed video data, action labels, and deception labels
        """
        self.video_paths = video_paths  # List of video file paths
        self.batch_size = batch_size    # Batch size
        self.save_dir = save_dir        # Directory where preprocessed data is stored
    
    def __len__(self):
        """
        Return the number of batches per epoch
        """
        return int(np.ceil(len(self.video_paths) / self.batch_size))
    
    def __getitem__(self, index):
        """
        Generate one batch of data for training
        """
        #print(f"Fetching batch {index} for epoch")  # Debug print
        # Get the batch of video file paths
        batch_files = self.video_paths[index * self.batch_size:(index + 1) * self.batch_size]
        
        # Load the corresponding batch of preprocessed videos and labels
        batch_frames, batch_action_labels, batch_deception_labels = self.load_preprocessed_batch(batch_files)
        #print(f"Loaded {len(batch_frames)} frames for batch {index}")  # Debug print
        
        # Ensure they are NumPy arrays
        batch_frames = np.array(batch_frames)
        batch_action_labels = np.array(batch_action_labels)
        batch_deception_labels = np.array(batch_deception_labels)
        
        # Ensure that labels are structured correctly (tuple format)
        return batch_frames, (batch_action_labels, batch_deception_labels)
    
    def load_preprocessed_batch(self, batch_files):
        """
        Load preprocessed videos, action labels, and deception labels from disk for a given batch
        """
        batch_frames = []
        batch_action_labels = []
        batch_deception_labels = []
        
        for video_file in batch_files:
            video_name = os.path.basename(video_file).split('.')[0]
            
            try:
                # Load the preprocessed video frames from disk
                frames = np.load(os.path.join(self.save_dir, f"{video_name}_preprocessed.npy"))
                
                # Ensure the frame count meets the required PAD_LENGTH
                if len(frames) < PAD_LENGTH:
                    raise ValueError(f"Video '{video_file}' has insufficient frames: {len(frames)} (expected {PAD_LENGTH})")
                
                # Load the action and deception labels
                action_labels = np.load(os.path.join(self.save_dir, f"{video_name}_action_label.npy"))
                deception_labels = np.load(os.path.join(self.save_dir, f"{video_name}_deception_label.npy"))
                
                # Append data to the batch lists
                batch_frames.append(frames)
                batch_action_labels.append(action_labels)
                batch_deception_labels.append(deception_labels)
    
            except Exception as e:
                # Log the file name and the issue encountered
                logging.warning(f"Skipping file {video_file}: {str(e)}")
                print(f"Skipping file {video_file} due to error: {str(e)}")
                continue  # Skip to the next video in the batch
        
        return np.array(batch_frames), np.array(batch_action_labels), np.array(batch_deception_labels)

    def on_epoch_end(self):
        """
        Shuffle the data after each epoch if necessary
        """
        print("Resetting indices and shuffling data for the next epoch")
        # Shuffle if necessary
        p = np.random.permutation(len(self.video_paths))
        self.video_paths = np.array(self.video_paths)[p]
        

    def _output_signature(self):
        """
        Define the output signature for tf.data compatibility
        """
        video_shape = (BATCH_SIZE, PAD_LENGTH, embedding_dim)  # Example shape of video frames
        action_label_shape = (BATCH_SIZE, PAD_LENGTH, ACTION_CLASSES)  # Example shape for action labels
        deception_label_shape = (BATCH_SIZE,)  # Example shape for deception labels
        
        return (
            tf.TensorSpec(shape=video_shape, dtype=tf.float32),
            (
                tf.TensorSpec(shape=action_label_shape, dtype=tf.float32),
                tf.TensorSpec(shape=deception_label_shape, dtype=tf.float32),
            )
        )


In [82]:
import numpy as np
import tensorflow as tf
import os
import logging

# Set up logging to track skipped files
logging.basicConfig(filename='skipped_files.log', level=logging.WARNING)

class VideoDataSequence(tf.keras.utils.Sequence):
    def __init__(self, video_paths, batch_size, save_dir, modelD, total_epochs):
        """
        Custom data sequence to load preprocessed video data, action labels, and deception labels.
        This version yields one frame at a time to the model.
        """
        self.video_paths = video_paths  # List of video file paths
        self.batch_size = batch_size    # Batch size (though here it’s per video)
        self.save_dir = save_dir        # Directory where preprocessed data is stored
        self.frames = []
        self.action_labels = []
        self.deception_labels = []
        self.current_video_index = 0  # To keep track of which video is being processed
        self.model = modelD             # Stateful model
        self.total_epochs = total_epochs  # Total number of epochs
        self.current_epoch = 0  # Track the current epoch
        self.load_next_video()

    def __len__(self):
        """
        Return the total number of frames across all videos.
        Each frame is treated as one batch item.
        """
        total_frames = sum([self.load_frame_count(v) for v in self.video_paths])
        return total_frames

    def __getitem__(self, index):
        """
        Yield one frame and its corresponding label at a time.
        """
        try:
            # Load the next video if we reach the end of the current video's frames
            #print("\n"+str(self.current_video_index))
            if len(self.frames) == 0:
                self.load_next_video()
    
            # If no more frames are available, stop yielding (don't raise an exception)
            if len(self.frames) == 0:
                # Check if we are on the final epoch
                if self.current_epoch >= self.total_epochs - 1:
                    print("Final epoch, no more frames available. Raising IndexError.")
                    raise IndexError("No more frames available. Dataset is exhausted.")
                else:
                    # Just return an empty batch if it's not the final epoch
                    print("EMPTY")
                    return np.zeros((1, 1, embedding_dim)), (np.zeros((1, 1, ACTION_CLASSES)), np.zeros((1,1)))
    
    
            #print("\n" + str(len(self.frames)))
    
            # Get the current frame, action label, and deception label
            frame=self.frames[0]
            pop_frame = self.frames.pop(0)
            action_label = self.action_labels
            deception_label = self.deception_labels
            
            # Add two extra dimensions to frame
            frame = np.reshape(frame, (1, 1) + frame.shape)
    
            return frame, (np.expand_dims(np.array([action_label]), axis=0), np.expand_dims(np.array([deception_label]), axis=0))

        except IndexError as e:
            raise e
            # When no more frames/videos are available, gracefully stop the generator
            #raise IndexError("No more frames available. Dataset is exhausted.")


    def load_next_video(self):
        """
        Load the next video into memory, along with its labels.
        """
        #print("AAAAAA")
        if self.current_video_index >= len(self.video_paths):
            #raise StopIteration("No more videos to load.")  # Signal the end of the generator
            self.frames, self.action_labels, self.deception_labels = [], [], []
            return  # Do not raise StopIteration here, just return to stop loading further


        # Reset the states of all stateful LSTM layers
        for layer in self.model.layers:
            if isinstance(layer, tf.keras.layers.LSTM) and layer.stateful:
                layer.reset_states()
                
        video_file = self.video_paths[self.current_video_index]
        video_name = os.path.basename(video_file).split('.')[0]
        
        try:
            # Load the preprocessed video frames from disk
            self.frames = list(np.load(os.path.join(self.save_dir, f"{video_name}_preprocessed.npy")))            
            # Load the action and deception labels
            self.action_labels = np.load(os.path.join(self.save_dir, f"{video_name}_action_label.npy"))
            self.deception_labels = np.load(os.path.join(self.save_dir, f"{video_name}_deception_label.npy"))

        except Exception as e:
            logging.warning(f"Skipping file {video_file}: {str(e)}")
            print(f"Skipping file {video_file} due to error: {str(e)}")
            self.frames, self.action_labels, self.deception_labels = [], [], []
        
        self.current_video_index += 1

    def load_frame_count(self, video_file):
        """
        Helper function to return the number of frames in a video file.
        """
        video_name = os.path.basename(video_file).split('.')[0]
        try:
            frames = np.load(os.path.join(self.save_dir, f"{video_name}_preprocessed.npy"))
            return len(frames)
        except Exception as e:
            logging.warning(f"Skipping file {video_file}: {str(e)}")
            return 0

    def on_epoch_end(self):
        """
        Reset the video index and states at the end of each epoch.
        """
        # Increment the current epoch count

        # Skip this method if it's the final epoch
        self.current_epoch += 1
        if self.current_epoch < self.total_epochs:
            self.current_video_index = 0
            print("\nEPOCH HAS ENDED" + str(self.current_epoch))
        # Shuffle if necessary
        #p = np.random.permutation(len(self.video_paths))
        #self.video_paths = np.array(self.video_paths)[p]
            self.frames=[]
            #self.load_next_video()


In [72]:
# Compile the model
model.compile(#optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2),
              optimizer='Adam',
              loss={'action_output': 'binary_crossentropy', 
                    'deception_output': 'binary_crossentropy'},
              metrics={'action_output': 'accuracy', 'deception_output': 'accuracy'})

# Display model summary
model.summary()

train = []
train.append(video_train[0])
train.append(video_train[1])

test = []
test.append(video_test[0])
test.append(video_test[1])
train_dataset = VideoDataSequence(train, batch_size=BATCH_SIZE, save_dir='preprocessed_train', modelD=model, total_epochs=EPOCHS)
test_dataset = VideoDataSequence(test, batch_size=BATCH_SIZE, save_dir='preprocessed_test', modelD=model, total_epochs=EPOCHS)

history = model.fit(
    train_dataset,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=test_dataset,
    steps_per_epoch=len(train_dataset)-2,
    validation_steps=len(test_dataset)-2,
    shuffle=True,  # Disable multiprocessing to ensure no prefetching
    verbose=1  
)

model.save('deception_model.keras')



Epoch 1/3
[1m173/175[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 129ms/step - action_output_accuracy: 1.0000 - action_output_loss: 0.0028 - deception_output_accuracy: 1.0000 - deception_output_loss: 1.1921e-07 - loss: 0.0028EMPTY
[1m174/175[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 129ms/step - action_output_accuracy: 1.0000 - action_output_loss: 0.0028 - deception_output_accuracy: 1.0000 - deception_output_loss: 1.1921e-07 - loss: 0.0028
EPOCH HAS ENDED1
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - action_output_accuracy: 1.0000 - action_output_loss: 0.0029 - deception_output_accuracy: 1.0000 - deception_output_loss: 1.1921e-07 - loss: 0.0029EMPTY

EPOCH HAS ENDED1
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 176ms/step - action_output_accuracy: 1.0000 - action_output_loss: 0.0029 - deception_output_accuracy: 1.0000 - deception_output_loss: 1.1921e-07 - loss: 0.0029 - val_action_output_accuracy: 0.0000e+00

In [215]:
def video_data_generator(video_paths, save_dir, model, total_epochs):
    """
    A generator that yields video frames and their corresponding labels for each video.
    """
    current_video_index = 0
    current_epoch = 0
    while current_epoch < total_epochs:
        if current_video_index == 0:
            np.random.shuffle(video_paths)  # Shuffle for each new epoch
            
        if current_video_index >= len(video_paths):
            current_epoch += 1
            current_video_index = 0
            np.random.shuffle(video_paths)  # Optionally shuffle for the next epoch
            if current_epoch >= total_epochs:
                break
        
        video_file = video_paths[current_video_index]
        video_name = os.path.basename(video_file).split('.')[0]

        try:
            frames = np.load(os.path.join(save_dir, f"{video_name}_preprocessed.npy"))
            #print("\n" + video_name + " "+ str(len(frames)))
            action_labels = np.load(os.path.join(save_dir, f"{video_name}_action_label.npy"))
            deception_labels = np.load(os.path.join(save_dir, f"{video_name}_deception_label.npy"))

            # Reset LSTM states for each new video
            for layer in model.layers:
                if isinstance(layer, tf.keras.layers.LSTM) and layer.stateful:
                    layer.reset_states()

            # for frame in frames:
            #     # Yield one frame and its labels at a time
            #     frame = np.expand_dims(frame, axis=(0, 1))  # Expands to (1, 1, 768)
    
            #     yield frame, (np.expand_dims(np.array([action_labels]), axis=0), np.expand_dims(np.array([deception_labels]), axis=0))
                        # Yield each frame and its corresponding labels
            for i, frame in enumerate(frames):
                frame = np.expand_dims(frame, axis=(0, 1))  # Shape becomes (1, 1, 768)
                
                # Log each frame to ensure all frames are processed
                #print(f"Yielding frame {i + 1}/{len(frames)} from video {video_name}")
    
                yield frame, (np.expand_dims(np.array([action_labels]), axis=0), np.expand_dims(np.array([deception_labels]), axis=0))


        except Exception as e:
            logging.warning(f"Skipping file {video_file}: {str(e)}")
            print(f"Skipping file {video_file} due to error: {str(e)}")

        current_video_index += 1


In [216]:
def create_tf_dataset(video_paths, batch_size, save_dir, model, total_epochs, embedding_dim, action_classes):
    dataset = tf.data.Dataset.from_generator(
        lambda: video_data_generator(video_paths, save_dir, model, total_epochs),
        output_signature=(
            tf.TensorSpec(shape=(1, 1, embedding_dim), dtype=tf.float32),  # Frame
            (
                tf.TensorSpec(shape=(1, 1, ACTION_CLASSES), dtype=tf.float32),  # Action label
                tf.TensorSpec(shape=(1, 1), dtype=tf.float32)  # Deception label
            )
        )
    )

    # Shuffle, batch, and prefetch the dataset
    #dataset = dataset.shuffle(buffer_size=len(video_paths) * 1)  # Adjust buffer size if needed
    #dataset = dataset.batch(batch_size)
    #dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    #dataset = dataset.prefetch(1)

    return dataset


In [218]:
# Compile the model
model.compile(#optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2),
              optimizer='Adam',
              loss={'action_output': 'binary_crossentropy', 
                    'deception_output': 'binary_crossentropy'},
              metrics={'action_output': 'accuracy', 'deception_output': 'accuracy'})

# Example Usage

train = []
train.append(video_train[0])
train.append(video_train[1])

test = []
test.append(video_test[0])
test.append(video_test[1])

steps_per_epoch = sum([np.load(os.path.join('preprocessed_train', f"{os.path.basename(video_file).split('.')[0]}_preprocessed.npy")).shape[0] for video_file in video_train])
validation_steps = sum([np.load(os.path.join('preprocessed_test', f"{os.path.basename(video_file).split('.')[0]}_preprocessed.npy")).shape[0] for video_file in video_test])

train_dataset = create_tf_dataset(
    video_paths=video_train,
    batch_size=BATCH_SIZE,
    save_dir='preprocessed_train',
    model=model,  # Stateful LSTM model
    total_epochs=EPOCHS,
    embedding_dim=embedding_dim,
    action_classes=ACTION_CLASSES
)

validation_dataset = create_tf_dataset(
    video_paths=video_test,
    batch_size=BATCH_SIZE,
    save_dir='preprocessed_test',
    model=model,  # Stateful LSTM model
    total_epochs=EPOCHS,
    embedding_dim=embedding_dim,
    action_classes=ACTION_CLASSES
)

# Training the model
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch-1,
    validation_steps=validation_steps-1,
    validation_data=validation_dataset
)

model.save('deception_model_generator.keras')


[1m9936/9936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1503s[0m 151ms/step - action_output_accuracy: 0.4423 - action_output_loss: 0.1569 - deception_output_accuracy: 0.9880 - deception_output_loss: 0.0575 - loss: 0.2144 - val_action_output_accuracy: 0.0000e+00 - val_action_output_loss: 2.0269 - val_deception_output_accuracy: 0.9902 - val_deception_output_loss: 0.1313 - val_loss: 2.1582


In [None]:
action_predictions = model.predict(create_tf_dataset(video_test, batch_size=BATCH_SIZE, save_dir='preprocessed_test', model=model, total_epochs=1, embedding_dim=embedding_dim, action_classes=ACTION_CLASSES))  # Predicts probabilities between 0 and 1

#print(predictions)

# Threshold for detecting an action
THRESHOLD = 0.5

# Iterate over each video's predictions
for i, video_pred in enumerate(action_predictions[0]):  # action_predictions[0] corresponds to action output
    print(f"\nVideo {i+1}:")
    
    # Average the predictions over all frames in the video (optional, depends on your approach)
    avg_action_pred = np.mean(video_pred, axis=0)
    
    # Apply the threshold to determine detected actions
    detected_actions = avg_action_pred > THRESHOLD
    
    # Print the detected action classes
    detected_action_names = [ACTION_COLUMNS[j] for j, detected in enumerate(detected_actions) if detected]
    
    if detected_action_names:
        print("Detected Actions:", detected_action_names)
    else:
        print("No actions detected above threshold.")


In [46]:
def predict_live_video(model, video_source=0):
    """Predict actions and deception from a live video feed (webcam or camera)."""
    cap = cv2.VideoCapture(video_source)  # 0 is the default webcam
    frame_buffer = []
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture video. Exiting...")
            break

        # Add the frame to the buffer
        frame_resized = cv2.resize(frame, FRAME_SIZE)
        frame_buffer=[]
        frame_buffer.append(frame_resized)
        frame_count += 1

        # If we have enough frames (NUM_FRAMES), make predictions
        if frame_count % 16 == 0:
            # Preprocess frames
            
            preprocessed_frames = preprocess_frames_with_vit(frame_buffer)
            preprocessed_frames = model.predict(np.expand_dims(preprocessed_frames, axis=0))
            #print(preprocessed_frames[0])

            # Make predictions
            action_pred, deception_pred = preprocessed_frames

            # Decode predictions
            # Apply the threshold to determine detected actions
            detected_actions = action_pred[0][0] > 0.5
            # Print the detected action classes
            detected_action_names = [ACTION_COLUMNS[j] for j, detected in enumerate(detected_actions) if detected]
            
            if detected_action_names:
                print("Detected Actions:", detected_action_names)
            else:
                print("No actions detected above threshold.")

            
            # predicted_actions = np.argmax(action_pred, axis=-1)[0]
            deception = 'Deceptive' if deception_pred[0][0] > 0.5 else 'Truthful'

            # Display predictions
            print(f"Predicted deception: {deception}")

        # Display the live video feed
        cv2.imshow('Live Video Feed', frame)

        # Break on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


In [48]:
model = load_model('deception_model_generator.keras')

# Perform real-time prediction 
predict_live_video(model, video_source=0)  # Set to 0 for default webcam


Shape of frame embeddings: (1, 768)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Detected Actions: ['holdingArms', 'soothingArms', 'interlockedHand', 'soothinHandM']
Predicted deception: Deceptive
Shape of frame embeddings: (1, 768)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Detected Actions: ['holdingArms', 'soothingArms', 'interlockedHand', 'soothinHandM']
Predicted deception: Deceptive
Shape of frame embeddings: (1, 768)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Detected Actions: ['holdingArms', 'soothingArms', 'interlockedHand', 'soothinHandM']
Predicted deception: Deceptive
Shape of frame embeddings: (1, 768)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Detected Actions: ['holdingArms', 'soothingArms', 'interlockedHand', 'soothinHandM']
Predicted deception: Deceptive
Shape of frame embeddings: (1, 768)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/s

KeyboardInterrupt: 