In [2]:
import cv2
import os

def extract_frames(video_path, output_folder, frame_interval=1):
    """
    Extracts frames from a video and saves them in a subfolder within the specified output folder.

    Args:
        video_path (str): The path to the video file.
        output_folder (str): The main folder where subfolders for each video will be created.
        frame_interval (int, optional): The interval between frames to extract. Defaults to 1 (extract every frame).
    """

    # Get the video file name without extension
    video_name = os.path.splitext(os.path.basename(video_path))[0]

    # Create a subfolder for this video
    video_output_folder = os.path.join(output_folder, video_name)
    if not os.path.exists(video_output_folder):
        os.makedirs(video_output_folder)

    # Open the video file
    video = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    # Initialize frame counter
    frame_count = 0

    while True:
        # Read the next frame
        ret, frame = video.read()

        # If we reached the end of the video, break the loop
        if not ret:
            break

        # Extract frames at the specified interval
        if frame_count % frame_interval == 0:
            # Save the frame in the video's subfolder
            frame_name = f"frame_{frame_count:05d}.jpg" 
            frame_path = os.path.join(video_output_folder, frame_name)
            cv2.imwrite(frame_path, frame)

        frame_count += 1

    # Release the video capture object
    video.release()

    print(f"Extracted {frame_count} frames from {video_path} to {video_output_folder}")

# Specify the paths
video_folder = "/Users/bugruster/Developer/MjrPrj/anksuchi" 
output_folder = "/Users/bugruster/Developer/MjrPrj/frames" 

# Iterate over all video files in the folder
for video_file in os.listdir(video_folder):
    if video_file.endswith(".mp4") or video_file.endswith(".avi"):  # Adjust video formats as needed
        video_path = os.path.join(video_folder, video_file)
        extract_frames(video_path, output_folder)

Extracted 162 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3017.MOV.mp4.mp4 to /Users/bugruster/Developer/MjrPrj/frames/IMG_3017.MOV.mp4
Extracted 189 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2702.MOV.mp4.mp4 to /Users/bugruster/Developer/MjrPrj/frames/IMG_2702.MOV.mp4
Extracted 197 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2793.MOV.mp4.mp4 to /Users/bugruster/Developer/MjrPrj/frames/IMG_2793.MOV.mp4
Extracted 232 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2698.MOV.mp4.mp4 to /Users/bugruster/Developer/MjrPrj/frames/IMG_2698.MOV.mp4
Extracted 188 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2796.MOV.mp4.mp4 to /Users/bugruster/Developer/MjrPrj/frames/IMG_2796.MOV.mp4
Extracted 156 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3012.MOV.mp4.mp4 to /Users/bugruster/Developer/MjrPrj/frames/IMG_3012.MOV.mp4
Extracted 182 frames from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2795.MOV.mp4.mp4 to /

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create the data generator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,       # Rotate images up to 20 degrees
    width_shift_range=0.2,   # Shift images horizontally up to 20% of the width
    height_shift_range=0.2,  # Shift images vertically up to 20% of the height
    shear_range=0.2,         # Apply shear transformations
    zoom_range=0.2,          # Apply random zooms
    horizontal_flip=True,    # Flip images horizontally
    fill_mode='nearest'      # Fill in newly created pixels
)

# Path to your frames directory
frames_directory = "/Users/bugruster/Developer/MjrPrj/frames" 

# Create a generator that will flow augmented images from your directory
train_generator = datagen.flow_from_directory(
    frames_directory, 
    target_size=(64, 64),  # Resize images to 64x64
    batch_size=32,
    class_mode='categorical' # Assuming you have subdirectories for each sign
)



Found 3186 images belonging to 17 classes.


In [7]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Data augmentation setup
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Path to your frames directory (update if needed)
frames_directory = "/Users/bugruster/Developer/MjrPrj/frames" 

# Create a generator that will flow augmented images from your directory
train_generator = datagen.flow_from_directory(
    frames_directory, 
    target_size=(64, 64),  
    batch_size=32,
    class_mode='categorical' 
)

# Model Building

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3))) 
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5)) 
model.add(Dense(17, activation='softmax')) 

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train the model using the augmented data generator
model.fit(
    train_generator,
    epochs=10,                
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
)

Found 3186 images belonging to 17 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 189ms/step - accuracy: 0.0642 - loss: 34.3160
Epoch 2/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37us/step - accuracy: 0.0938 - loss: 2.8321
Epoch 3/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 191ms/step - accuracy: 0.0732 - loss: 2.8320
Epoch 4/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30us/step - accuracy: 0.0625 - loss: 2.8321
Epoch 5/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 190ms/step - accuracy: 0.0748 - loss: 2.8303
Epoch 6/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28us/step - accuracy: 0.1875 - loss: 2.8248
Epoch 7/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 195ms/step - accuracy: 0.0774 - loss: 2.8292
Epoch 8/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26us/step - accuracy: 0.1562 - loss: 2.8161
Epoch 9/10
[1m99/99[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x3222b0a90>

In [9]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam

# Data augmentation setup
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Path to your frames directory 
frames_directory = "/Users/bugruster/Developer/MjrPrj/frames" 

# Create a generator that will flow augmented images from your directory
train_generator = datagen.flow_from_directory(
    frames_directory, 
    target_size=(64, 64),  
    batch_size=32,
    class_mode='categorical' 
)

# Convert DirectoryIterator to tf.data.Dataset
train_dataset = tf.data.Dataset.from_generator(
    lambda: train_generator,
    output_types=(tf.float32, tf.float32),
    output_shapes=([None, 64, 64, 3], [None, 17]) 
)

# Repeat the dataset indefinitely
train_dataset = train_dataset.repeat()

# Model Building with Input layer

input_shape = (64, 64, 3) 
inputs = Input(shape=input_shape)

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu')) 
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5)) 
model.add(Dense(17, activation='softmax')) 

# Create the model with the Input layer
model = tf.keras.Model(inputs=inputs, outputs=model(inputs))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train the model using the augmented dataset
model.fit(
    train_dataset,
    epochs=10,                
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
)

Found 3186 images belonging to 17 classes.
Epoch 1/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 200ms/step - accuracy: 0.0644 - loss: 26.0924
Epoch 2/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 202ms/step - accuracy: 0.0624 - loss: 2.8325
Epoch 3/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 206ms/step - accuracy: 0.0846 - loss: 2.8295
Epoch 4/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 197ms/step - accuracy: 0.0676 - loss: 2.8296
Epoch 5/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 210ms/step - accuracy: 0.0666 - loss: 2.8295
Epoch 6/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 224ms/step - accuracy: 0.0761 - loss: 2.8296
Epoch 7/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 203ms/step - accuracy: 0.0728 - loss: 2.8276
Epoch 8/10
[1m99/99[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 202ms/step - accuracy: 0.0797 - l

<keras.src.callbacks.history.History at 0x3494818b0>

In [10]:
import cv2
import mediapipe as mp
import os

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

def extract_frames_with_landmarks(video_path, output_folder, frame_interval=1):
    """
    Extracts frames from a video, detects hand landmarks using MediaPipe, 
    and saves both the original frames and landmark visualizations.

    Args:
        video_path (str): The path to the video file.
        output_folder (str): The main folder where subfolders for each video will be created.
        frame_interval (int, optional): The interval between frames to extract. Defaults to 1 (extract every frame).
    """

    # Get the video file name without extension
    video_name = os.path.splitext(os.path.basename(video_path))[0]

    # Create subfolders for original frames and landmark visualizations
    frames_output_folder = os.path.join(output_folder, video_name, "frames")
    landmarks_output_folder = os.path.join(output_folder, video_name, "landmarks")
    
    if not os.path.exists(frames_output_folder):
        os.makedirs(frames_output_folder)
    if not os.path.exists(landmarks_output_folder):
        os.makedirs(landmarks_output_folder)

    # Initialize MediaPipe Hands
    with mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,        # Detect up to 2 hands
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as hands:

        # Open the video file
        video = cv2.VideoCapture(video_path)

        # Get the total number of frames in the video
        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

        # Initialize frame counter
        frame_count = 0

        while True:
            # Read the next frame
            ret, frame = video.read()

            # If we reached the end of the video, break the loop
            if not ret:
                break

            # Extract frames at the specified interval
            if frame_count % frame_interval == 0:
                # Convert BGR image to RGB
                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                # Process the image and find hand landmarks
                results = hands.process(image)

                # Draw landmarks on the image if hands are detected
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(
                            image,
                            hand_landmarks,
                            mp_hands.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                            mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2))

                # Save the original frame
                frame_name = f"frame_{frame_count:05d}.jpg" 
                frame_path = os.path.join(frames_output_folder, frame_name)
                cv2.imwrite(frame_path, frame)

                # Save the image with landmarks
                landmark_image_name = f"landmark_{frame_count:05d}.jpg"
                landmark_image_path = os.path.join(landmarks_output_folder, landmark_image_name)
                cv2.imwrite(landmark_image_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

            frame_count += 1

        # Release the video capture object
        video.release()

        print(f"Extracted {frame_count} frames and landmarks from {video_path}")

# Specify the paths (update if needed)
video_folder = "/Users/bugruster/Developer/MjrPrj/anksuchi" 
output_folder = "/Users/bugruster/Developer/MjrPrj/frames_with_landmarks" 

# Iterate over all video files in the folder
for video_file in os.listdir(video_folder):
    if video_file.endswith(".mp4") or video_file.endswith(".avi"): 
        video_path = os.path.join(video_folder, video_file)
        extract_frames_with_landmarks(video_path, output_folder)

I0000 00:00:1725602212.042343  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1725602212.049303  701947 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602212.054796  701950 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 162 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3017.MOV.mp4.mp4


I0000 00:00:1725602219.633077  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602219.638168  702190 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602219.642692  702190 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 189 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2702.MOV.mp4.mp4


I0000 00:00:1725602228.558676  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602228.564246  702302 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602228.568308  702302 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 197 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2793.MOV.mp4.mp4


I0000 00:00:1725602237.337380  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602237.341379  702469 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602237.345728  702469 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 232 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2698.MOV.mp4.mp4


I0000 00:00:1725602248.474165  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602248.478627  702561 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602248.482283  702561 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 188 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2796.MOV.mp4.mp4


I0000 00:00:1725602256.777932  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602256.782574  702701 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602256.786768  702701 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 156 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3012.MOV.mp4.mp4


I0000 00:00:1725602264.324411  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602264.329103  702846 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602264.333414  702846 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 182 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2795.MOV.mp4.mp4


I0000 00:00:1725602272.622141  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602272.626592  702913 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602272.631120  702913 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 214 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2701.MOV.mp4.mp4


I0000 00:00:1725602282.705587  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602282.710202  703028 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602282.714774  703028 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 160 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3014.MOV.mp4.mp4


I0000 00:00:1725602290.387842  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602290.392676  703099 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602290.396594  703099 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 238 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2697.MOV.mp4.mp4


I0000 00:00:1725602302.178420  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602302.182348  703207 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602302.186209  703207 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 167 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3013.MOV.mp4.mp4


I0000 00:00:1725602310.257695  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602310.262364  703281 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602310.266005  703281 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 181 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2792.MOV.mp4.mp4


I0000 00:00:1725602318.232198  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602318.236190  703351 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602318.239995  703351 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 202 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2703.MOV.mp4.mp4


I0000 00:00:1725602327.505730  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602327.511379  703485 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602327.515315  703485 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 154 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3016.MOV.mp4.mp4


I0000 00:00:1725602334.621151  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602334.626784  703631 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602334.630562  703633 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 180 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_3015.MOV.mp4.mp4


I0000 00:00:1725602343.540275  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602343.545057  703745 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602343.548880  703745 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 198 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2700.MOV.mp4.mp4


I0000 00:00:1725602353.069837  687946 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1725602353.074546  703912 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1725602353.079436  703912 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Extracted 186 frames and landmarks from /Users/bugruster/Developer/MjrPrj/anksuchi/IMG_2794.MOV.mp4.mp4


In [11]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import Sequence

class DualInputGenerator(Sequence):
    def __init__(self, frames_dir, landmarks_dir, batch_size=32, target_size=(64, 64)):
        self.frames_dir = frames_dir
        self.landmarks_dir = landmarks_dir
        self.batch_size = batch_size
        self.target_size = target_size
        self.samples = self._get_samples()

    def _get_samples(self):
        samples = []
        for class_name in os.listdir(self.frames_dir):
            class_frames_dir = os.path.join(self.frames_dir, class_name, "frames")
            class_landmarks_dir = os.path.join(self.landmarks_dir, class_name, "landmarks")
            for frame_file in os.listdir(class_frames_dir):
                frame_path = os.path.join(class_frames_dir, frame_file)
                landmark_file = frame_file.replace("frame_", "landmark_")
                landmark_path = os.path.join(class_landmarks_dir, landmark_file)
                samples.append((frame_path, landmark_path, class_name))
        return samples

    def __len__(self):
        return len(self.samples) // self.batch_size

    def __getitem__(self, index):
        batch_samples = self.samples[index * self.batch_size : (index + 1) * self.batch_size]
        frames = []
        landmarks = []
        labels = []
        for frame_path, landmark_path, class_name in batch_samples:
            frame = load_img(frame_path, target_size=self.target_size)
            frame = img_to_array(frame) / 255.0
            landmark = load_img(landmark_path, target_size=self.target_size)
            landmark = img_to_array(landmark) / 255.0
            frames.append(frame)
            landmarks.append(landmark)
            labels.append(class_name)

        return [np.array(frames), np.array(landmarks)], tf.keras.utils.to_categorical(labels, num_classes=17)

# Example usage:

# Path to your frames and landmarks directories 
frames_directory = "/Users/bugruster/Developer/MjrPrj/frames_with_landmarks" 
landmarks_directory = "/Users/bugruster/Developer/MjrPrj/frames_with_landmarks"

# Create the dual input generator
train_generator = DualInputGenerator(frames_directory, landmarks_directory)

In [23]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import Sequence, to_categorical
import numpy as np
import os
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Data augmentation setup (only for original frames)
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Path to your frames and landmarks directories 
frames_directory = "/Users/bugruster/Developer/MjrPrj/frames_with_landmarks" 
landmarks_directory = "/Users/bugruster/Developer/MjrPrj/frames_with_landmarks"

class DualInputGenerator(Sequence):
    def __init__(self, frames_dir, landmarks_dir, batch_size=32, target_size=(64, 64)):
        self.frames_dir = frames_dir
        self.landmarks_dir = landmarks_dir
        self.batch_size = batch_size
        self.target_size = target_size
        self.samples = self._get_samples()
        self.class_indices = {class_name: i for i, class_name in enumerate(sorted(os.listdir(self.frames_dir)))}
        print(f"Total samples: {len(self.samples)}")
        print(f"Number of classes: {len(self.class_indices)}")

    def _get_samples(self):
        samples = []
        for class_name in os.listdir(self.frames_dir):
            class_frames_dir = os.path.join(self.frames_dir, class_name, "frames")
            class_landmarks_dir = os.path.join(self.landmarks_dir, class_name, "landmarks")
            frame_files = os.listdir(class_frames_dir)
            for frame_file in frame_files:
                frame_path = os.path.join(class_frames_dir, frame_file)
                landmark_file = frame_file.replace("frame_", "landmark_")
                landmark_path = os.path.join(class_landmarks_dir, landmark_file)
                if os.path.exists(landmark_path):
                    samples.append((frame_path, landmark_path, class_name))
                else:
                    print(f"Warning: Landmark file not found for {frame_file}")
        return samples

    def __len__(self):
        return max(1, len(self.samples) // self.batch_size)

    def __getitem__(self, index):
        start_index = index * self.batch_size
        end_index = min((index + 1) * self.batch_size, len(self.samples))
        batch_samples = self.samples[start_index:end_index]
        
        if len(batch_samples) == 0:
            print(f"Warning: Empty batch at index {index}")
            # Return a dummy batch to avoid errors
            return (np.zeros((1, 64, 64, 3)), np.zeros((1, 64, 64, 3))), np.zeros((1, len(self.class_indices)))
        
        frames = []
        landmarks = []
        labels = []
        for frame_path, landmark_path, class_name in batch_samples:
            try:
                frame = load_img(frame_path, target_size=self.target_size)
                frame = img_to_array(frame) / 255.0
                frame = datagen.random_transform(frame) 
                landmark = load_img(landmark_path, target_size=self.target_size)
                landmark = img_to_array(landmark) / 255.0
                frames.append(frame)
                landmarks.append(landmark)
                labels.append(self.class_indices[class_name])
            except Exception as e:
                print(f"Error processing sample: {e}")
                print(f"Frame path: {frame_path}")
                print(f"Landmark path: {landmark_path}")

        if len(frames) == 0:
            print(f"Warning: No valid samples in batch at index {index}")
            # Return a dummy batch to avoid errors
            return (np.zeros((1, 64, 64, 3)), np.zeros((1, 64, 64, 3))), np.zeros((1, len(self.class_indices)))

        frames = np.array(frames)
        landmarks = np.array(landmarks)
        labels = to_categorical(labels, num_classes=len(self.class_indices))

        print(f"Batch shape: frames {frames.shape}, landmarks {landmarks.shape}, labels {labels.shape}")
        return (frames, landmarks), labels

# Create the dual input generator
train_generator = DualInputGenerator(frames_directory, landmarks_directory)

# Model building with dual input 
frame_input = Input(shape=(64, 64, 3))
landmark_input = Input(shape=(64, 64, 3))

# Frame processing branch
x1 = Conv2D(32, (3, 3), activation='relu')(frame_input)
x1 = MaxPooling2D((2, 2))(x1)
x1 = Conv2D(64, (3, 3), activation='relu')(x1)
x1 = MaxPooling2D((2, 2))(x1)
x1 = Flatten()(x1)

# Landmark processing branch
x2 = Conv2D(32, (3, 3), activation='relu')(landmark_input)
x2 = MaxPooling2D((2, 2))(x2)
x2 = Conv2D(64, (3, 3), activation='relu')(x2)
x2 = MaxPooling2D((2, 2))(x2)
x2 = Flatten()(x2)

# Combine both branches
combined = concatenate([x1, x2])
x = Dense(64, activation='relu')(combined)
x = Dropout(0.5)(x)
output = Dense(len(train_generator.class_indices), activation='softmax')(x)

model = Model(inputs=[frame_input, landmark_input], outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train the model using the generator
model.fit(
    train_generator,
    epochs=20,
    steps_per_epoch=len(train_generator)
)

Total samples: 3186
Number of classes: 17
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Epoch 1/20


  self._warn_if_super_not_called()


Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
[1m 1/99[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:59[0m 4s/step - accuracy: 0.0000e+00 - loss: 2.9708Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
[1m 2/99[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m36s[0m 380ms/step - accuracy: 0.0000e+00 - loss: 2.9905Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32

  self.gen.throw(type, value, traceback)


Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
[1m 1/99[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:56[0m 3s/step - accuracy: 0.0000e+00 - loss: 2.8343Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32, 17)
[1m 2/99[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m37s[0m 391ms/step - accuracy: 0.0000e+00 - loss: 2.8342Batch shape: frames (32, 64, 64, 3), landmarks (32, 64, 64, 3), labels (32

<keras.src.callbacks.history.History at 0x36f972f40>