In [1]:
import os
import glob
import random
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, Sequential, Model, optimizers
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score, f1_score, matthews_corrcoef, confusion_matrix
import warnings 
warnings.filterwarnings("ignore")
%matplotlib inline


2024-09-30 16:49:44.515600: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-30 16:49:44.515705: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-30 16:49:44.654451: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Function to create directory if it does not exist
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

# Base path to the synthetic dataset
base_path = '/kaggle/input/movement-dataset-entire/synthetic_dataset'
# Path to save the frames
frames_base_dir = '/kaggle/working/frames'
test_dir = '/kaggle/input/test-movement-dataset/input_videos'
frames_test_dir = '/kaggle/working/test-frames'

# Create the directory to save frames
create_directory(frames_base_dir)
create_directory(frames_test_dir)


In [3]:
train_image_paths = []
train_foot_contacts = []
test_image_paths = []
test_foot_contacts = []

# Global frame counter
global_frame_count = 0
max_frames = 6000

# Function to process video files and extract frames and labels
def process_videos(base_path, frames_base_dir, image_paths, foot_contacts):
    global global_frame_count

    for person_folder in os.listdir(base_path):
        person_path = os.path.join(base_path, person_folder)

        if os.path.isdir(person_path):
            # Loop through each activity folder
            for activity_folder in os.listdir(person_path):
                activity_path = os.path.join(person_path, activity_folder)

                if os.path.isdir(activity_path):
                    # Path to the .npy file containing foot contact data
                    foot_contacts_path = os.path.join(activity_path, 'foot_contacts.npy')

                    if os.path.exists(foot_contacts_path):
                        # Load foot contact labels
                        foot_contact_labels = np.load(foot_contacts_path)

                        # Construct the pattern to match video files
                        video_pattern = f'{activity_folder}_view0.mp4'

                        # Loop through each video file in the activity folder
                        for video_file in os.listdir(activity_path):
                            if video_file == video_pattern:
                                video_path = os.path.join(activity_path, video_file)

                                # Directory to save the frames of the current video
                                frames_dir = os.path.join(frames_base_dir, f'{person_folder}_{activity_folder}')
                                create_directory(frames_dir)

                                # Capture the video from the file
                                cap = cv2.VideoCapture(video_path)
                                if not cap.isOpened():
                                    print(f"Error: Could not open video {video_path}")
                                    continue

                                frame_count = 0
                                while cap.isOpened() and global_frame_count < max_frames:
                                    ret, frame = cap.read()
                                    if not ret:
                                        break

                                    # Save frame as image
                                    frame_filename = f'frame_{frame_count:04d}.jpg'
                                    frame_path = os.path.join(frames_dir, frame_filename)
                                    cv2.imwrite(frame_path, frame)

                                    # Append frame path and corresponding foot contact label to lists
                                    if frame_count < len(foot_contact_labels):
                                        image_paths.append(frame_path)
                                        foot_contacts.append(foot_contact_labels[frame_count])

                                    frame_count += 1
                                    global_frame_count += 1

                                    # Stop processing if frame limit is reached
                                    if global_frame_count >= max_frames:
                                        print("Reached the frame limit of 6000.")
                                        break

                                # Release the capture
                                cap.release()

                                print(f"Processed {frame_count} frames from {video_file}")

                                # Stop further processing if frame limit is reached
                                if global_frame_count >= max_frames:
                                    return

# Process the training data
process_videos(base_path, frames_base_dir, train_image_paths, train_foot_contacts)

# Create DataFrame for training data
train_data = {
    'filename': train_image_paths,
    'left_heel': [label[0] for label in train_foot_contacts],
    'left_toe': [label[1] for label in train_foot_contacts],
    'right_heel': [label[2] for label in train_foot_contacts],
    'right_toe': [label[3] for label in train_foot_contacts]
}

train_left_leg_values = [label[0] & label[1] for label in train_foot_contacts]
train_right_leg_values = [label[2] & label[3] for label in train_foot_contacts]

train_data_left_leg = {
    'filename': train_image_paths,
    'left_leg': train_left_leg_values
}

train_data_right_leg = {
    'filename': train_image_paths,
    'right_leg': train_right_leg_values
}

train_left_df = pd.DataFrame(train_data_left_leg)
train_right_df = pd.DataFrame(train_data_right_leg)
train_df = pd.DataFrame(train_data)

print(train_df.head())
print(train_right_df.head())
print(train_left_df.head())


Processed 60 frames from 449_swing_dancing_view0.mp4
Processed 60 frames from 1847_baseball_walk_in_view0.mp4
Processed 60 frames from 378_salsa_dancing_view0.mp4
Processed 60 frames from 321_boxing_view0.mp4
Processed 60 frames from 1854_baseball_step_up_to_bat_view0.mp4
Processed 60 frames from 43_walking_view0.mp4
Processed 60 frames from 189_baseball_pitching_view0.mp4
Processed 60 frames from 391_salsa_dancing_view0.mp4
Processed 60 frames from 2166_standing_turn_90_right_view0.mp4
Processed 60 frames from 376_salsa_dancing_view0.mp4
Processed 60 frames from 1855_baseball_milling_idle_view0.mp4
Processed 60 frames from 2165_standing_turn_90_left_view0.mp4
Processed 60 frames from 448_salsa_dancing_view0.mp4
Processed 60 frames from 392_salsa_dancing_view0.mp4
Processed 60 frames from 326_samba_dancing_view0.mp4
Processed 60 frames from 1851_baseball_hit_view0.mp4
Processed 60 frames from 1529_quarterback_pass_view0.mp4
Processed 60 frames from 328_boxing_view0.mp4
Processed 60 fra

In [4]:
# Initialize lists to hold the dataset
test_image_paths = []
test_foot_contacts = []

def process_test_videos(test_dir, frame_test_dir, image_paths, foot_contacts):
    for activity_folder in os.listdir(test_dir):
        activity_path = os.path.join(test_dir, activity_folder)
        
        if os.path.isdir(activity_path):
            # Path to the .npy file containing foot contact data
            foot_contacts_path = os.path.join(activity_path, 'foot_contacts.npy')

            if os.path.exists(foot_contacts_path):
                # Load foot contact labels
                foot_contact_labels = np.load(foot_contacts_path)

                # Find the video file
                video_file = f"{activity_folder}.mp4"
                video_path = os.path.join(activity_path, video_file)

                if os.path.exists(video_path):
                    # Directory to save the frames of the current video
                    frames_dir = os.path.join(frame_test_dir, activity_folder)
                    create_directory(frames_dir)

                    # Capture the video from the file
                    cap = cv2.VideoCapture(video_path)
                    if not cap.isOpened():
                        print(f"Error: Could not open video {video_path}")
                        continue

                    frame_count = 0
                    while cap.isOpened():
                        ret, frame = cap.read()
                        if not ret:
                            break

                        # Save frame as image
                        frame_filename = f'frame_{frame_count:04d}.jpg'
                        frame_path = os.path.join(frames_dir, frame_filename)
                        cv2.imwrite(frame_path, frame)

                        # Append frame path and corresponding foot contact label to lists
                        if frame_count < len(foot_contact_labels):
                            image_paths.append(frame_path)
                            foot_contacts.append(foot_contact_labels[frame_count])
                        frame_count += 1

                    # Release the capture
                    cap.release()

                    print(f"Processed {frame_count} frames from {video_file}")

process_test_videos(test_dir, frames_test_dir, test_image_paths, test_foot_contacts)
# Prepare DataFrames for left and right leg
test_data = {
    'filename': test_image_paths,
    'left_heel': [label[0] for label in test_foot_contacts],
    'left_toe': [label[1] for label in test_foot_contacts],
    'right_heel': [label[2] for label in test_foot_contacts],
    'right_toe': [label[3] for label in test_foot_contacts]
}

test_left_leg_values = [label[0] & label[1] for label in test_foot_contacts]
test_right_leg_values = [label[2] & label[3] for label in test_foot_contacts]

test_data_left_leg = {
    'filename': test_image_paths,
    'left_leg': test_left_leg_values
}

test_data_right_leg = {
    'filename': test_image_paths,
    'right_leg': test_right_leg_values
}

test_left_df = pd.DataFrame(test_data_left_leg)
test_right_df = pd.DataFrame(test_data_right_leg)
test_df = pd.DataFrame(test_data)

print(test_df.head())
print(test_right_df.head())
print(test_left_df.head())


Processed 621 frames from jumping_video_prashant_gupta.mp4
                                            filename  left_heel  left_toe  \
0  /kaggle/working/test-frames/jumping_video_pras...          0         1   
1  /kaggle/working/test-frames/jumping_video_pras...          0         1   
2  /kaggle/working/test-frames/jumping_video_pras...          0         1   
3  /kaggle/working/test-frames/jumping_video_pras...          1         1   
4  /kaggle/working/test-frames/jumping_video_pras...          1         1   

   right_heel  right_toe  
0           1          1  
1           1          1  
2           1          1  
3           1          1  
4           1          1  
                                            filename  right_leg
0  /kaggle/working/test-frames/jumping_video_pras...          1
1  /kaggle/working/test-frames/jumping_video_pras...          1
2  /kaggle/working/test-frames/jumping_video_pras...          1
3  /kaggle/working/test-frames/jumping_video_pras...        

In [5]:
# Function to load and preprocess image
def load_and_preprocess_image(image_path, target_size=(224, 224)):
    try:
        img = cv2.imread(image_path.decode('utf-8'))
        if img is None:
            print(f"Failed to load image at path: {image_path}")
            return np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        h, w, _ = img.shape
        if h > w:
            pad_width = (h - w) // 2
            padding = ((0, 0), (pad_width, h - w - pad_width), (0, 0))
        else:
            pad_height = (w - h) // 2
            padding = ((pad_height, w - h - pad_height), (0, 0), (0, 0))
        
        img = np.pad(img, padding, mode='constant', constant_values=255)
        img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
        img = img / 255.0
        return img.astype(np.float32)
    except Exception as e:
        print(f"Error processing image at path: {image_path}, error: {e}")
        return np.zeros((target_size[0], target_size[1], 3), dtype=np.float32)

# Data Augmentation
data_augmentation = tf.keras.Sequential(
    [
        layers.RandomFlip("horizontal", input_shape=(224, 224, 3)),
    ]
)
def create_dataset(image_paths, labels, batch_size, training=True):
    def load_and_preprocess_image_tf(image_path, label):
        img = tf.numpy_function(load_and_preprocess_image, [image_path], tf.float32)
        img.set_shape((224, 224, 3))
        return img, label

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(load_and_preprocess_image_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    if training:
        dataset = dataset.shuffle(buffer_size=len(image_paths))
        dataset = dataset.map(lambda x, y: (data_augmentation(tf.expand_dims(x, 0))[0], y), num_parallel_calls=tf.data.experimental.AUTOTUNE)
    
    dataset = dataset.batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset


In [6]:
from tensorflow.keras import layers, models, optimizers
# Define the MLP architecture
model = models.Sequential([
    layers.Flatten(input_shape=(224, 224, 3)), 
    layers.Dense(1024, activation='relu'),     
    layers.BatchNormalization(),             
    layers.Dropout(0.5),                      
    layers.Dense(512, activation='relu'),       
    layers.BatchNormalization(),               
    layers.Dropout(0.5),                        
    layers.Dense(256, activation='relu'),       
    layers.BatchNormalization(),               
    layers.Dropout(0.5),                        
    layers.Dense(1, activation='sigmoid')      
])

# Compile the model with Adam optimizer
optimizer = optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# Print the summary of the model
model.summary()#
# Split data into training and validation sets
#train_paths, val_paths, train_labels, val_labels = train_test_split(train_right_df['filename'].values, train_right_df['right_leg'].values, test_size=0.2, random_state=42)

# Prepare datasets
#train_dataset = create_dataset(train_paths, train_labels, batch_size=4, training=True)  # Smaller batch size
#val_dataset = create_dataset(val_paths, val_labels, batch_size=4, training=False)

# Prepare datasets
train_dataset = create_dataset(train_right_df['filename'].values, train_right_df['right_leg'].values, batch_size=4, training=True)
val_dataset = create_dataset(test_right_df['filename'].values, test_right_df['right_leg'].values, batch_size=4, training=False)

# Train the model



#history = model.fit(train_dataset, validation_data=val_dataset, epochs=10, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, verbose=1)

In [7]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',  # or 'val_loss'
    patience=20,  # number of epochs with no improvement after which training will be stopped
    verbose=1,
    restore_best_weights=True  # restores the model weights from the epoch with the best value of the monitored quantity
)
history = model.fit(train_dataset, validation_data=val_dataset, epochs=300, verbose=1,callbacks=[early_stopping])

Epoch 1/300
[1m   7/1500[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33s[0m 22ms/step - accuracy: 0.5628 - loss: 1.0695

I0000 00:00:1727715079.963349     471 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1727715079.983569     471 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5582 - loss: 0.8836

W0000 00:00:1727715111.995090     470 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 27ms/step - accuracy: 0.5582 - loss: 0.8836 - val_accuracy: 0.2287 - val_loss: 1.0957
Epoch 2/300
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 26ms/step - accuracy: 0.6182 - loss: 0.6784 - val_accuracy: 0.7713 - val_loss: 0.5752
Epoch 3/300
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 28ms/step - accuracy: 0.6318 - loss: 0.6594 - val_accuracy: 0.7713 - val_loss: 0.5258
Epoch 4/300
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 26ms/step - accuracy: 0.6549 - loss: 0.6378 - val_accuracy: 0.7713 - val_loss: 0.6192
Epoch 5/300
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 26ms/step - accuracy: 0.6475 - loss: 0.6324 - val_accuracy: 0.7713 - val_loss: 0.8513
Epoch 6/300
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 26ms/step - accuracy: 0.6615 - loss: 0.6235 - val_accuracy: 0.7713 - val_loss: 0.7290
Epoch 7/30