In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.layers import DepthwiseConv2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, Reshape, GlobalAveragePooling2D, Activation,UpSampling2D, AveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools

from mediapipe.python.solutions import pose as mp_pose
import cv2
pose_tracker = mp_pose.Pose()

In [2]:
train_path = 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/'
valid_path = 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/VALID_landmarks/'
test_path = 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TEST_landmarks/'

train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_directory(
    directory=train_path, target_size=(224,224), batch_size=32)

valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_directory(
    directory=valid_path, target_size=(224, 224), batch_size=32)

test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input).flow_from_directory(
    directory=test_path, target_size=(224,224), batch_size=32, shuffle=False)

Found 10554 images belonging to 15 classes.
Found 2990 images belonging to 15 classes.
Found 1525 images belonging to 15 classes.


In [3]:
train_batches.filepaths

['C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10023.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10027.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10034.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10082.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10084.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10137.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10141.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\Image_10143.jpg',
 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/calling\\I

In [4]:
import os
import cv2
import numpy as np
from mediapipe.python.solutions import pose as mp_pose
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def flow_from_directory_with_landmarks(
    generator,
    directory,
    target_size=(256, 256),
    color_mode="rgb",
    classes=None,
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=None,
    save_to_dir=None,
    save_prefix="",
    save_format="png",
    follow_links=False,
    subset=None,
    interpolation="nearest",
):
    # Get the original flow_from_directory output
    iterator = generator.flow_from_directory(
        directory,
        target_size=target_size,
        color_mode=color_mode,
        classes=classes,
        class_mode=class_mode,
        batch_size=batch_size,
        shuffle=shuffle,
        seed=seed,
        save_to_dir=save_to_dir,
        save_prefix=save_prefix,
        save_format=save_format,
        follow_links=follow_links,
        subset=subset,
        interpolation=interpolation,
    )

    # Create empty lists to store image data and landmarks
    image_data = []
    landmarks = []

    # Load and preprocess each image with landmarks
    pose_tracker = mp_pose.Pose()
    for image_path in iterator.filepaths:
        # Read the image
        image = cv2.imread(image_path)

        # Extract landmarks using mediapipe.pose
        results = pose_tracker.process(image)
        pose_landmarks = results.pose_landmarks

        # Check if landmarks were detected
        if pose_landmarks is not None:
            # Extract landmark coordinates
            landmark_coordinates = [[lmk.x, lmk.y, lmk.z] for lmk in pose_landmarks.landmark]
            landmarks.append(landmark_coordinates)
        else:
            # If landmarks were not detected, append zeros
            landmarks.append(np.zeros((33, 3)))

        # Preprocess and resize the image
        image = cv2.resize(image, target_size)
        image_data.append(image)

    # Convert the image data and landmarks to numpy arrays
    image_data = np.array(image_data)
    landmarks = np.array(landmarks)

    # Append zeros and reshape landmarks array
    landmarks = np.concatenate((landmarks, np.zeros((len(image_data), 1024 - 33, 3))), axis=1)
    landmarks = landmarks.reshape((len(image_data), 32, 32, 3))

    # Return the modified image data and labels
    return image_data, landmarks, iterator.labels

# Usage example
train_path = 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TRAIN_landmarks/'
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input)

# Generate batches of image data and landmarks from the directory
train_images, train_landmarks, train_labels = flow_from_directory_with_landmarks(
    train_batches,
    directory=train_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True
)

# Verify the shapes of the data
print(train_images.shape)       # Shape of train_images
print(train_landmarks.shape)    # Shape of train_landmarks
print(train_labels.shape)       # Shape of train_labels

# Convert training labels to categorical format
train_labels_encoded = tf.keras.utils.to_categorical(train_labels, num_classes=15)
print(train_labels_encoded.shape)




Found 10554 images belonging to 15 classes.
(10554, 224, 224, 3)
(10554, 32, 32, 3)
(10554,)
(10554, 15)


In [5]:
# Modify the variable names and paths for validation data
val_path = 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/VALID_landmarks/'
val_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input)

# Generate batches of image data and landmarks from the validation directory
val_images, val_landmarks, val_labels = flow_from_directory_with_landmarks(
    val_batches,
    directory=val_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True
)

# Verify the shapes of the validation data
print(val_images.shape)       # Shape of val_images
print(val_landmarks.shape)    # Shape of val_landmarks
print(val_labels.shape)       # Shape of val_labels


Found 2990 images belonging to 15 classes.
(2990, 224, 224, 3)
(2990, 32, 32, 3)
(2990,)


In [6]:
valid_labels_encoded = tf.keras.utils.to_categorical(val_labels, num_classes=15)

In [7]:
print(valid_labels_encoded.shape)

(2990, 15)


In [8]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import Sequence
from tensorflow.keras.applications.mobilenet import preprocess_input
from mediapipe.python.solutions import pose as mp_pose
import tensorflow as tf


In [9]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dropout, Dense, concatenate
from tensorflow.keras.models import Model

model1 = tf.keras.Sequential([
    hub.KerasLayer(
        'https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5',
        trainable=True),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(128, activation='relu'),
])

# Build the model
model1.build((None, 224, 224, 3))
model1.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 1280)              4226432   
_________________________________________________________________
dropout (Dropout)            (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               163968    
Total params: 4,390,400
Trainable params: 4,366,000
Non-trainable params: 24,400
_________________________________________________________________


In [10]:
mobile = tf.keras.applications.MobileNetV2(
    input_shape=(32, 32, 3), weights=None, include_top=False)

In [11]:
x = mobile.layers[-1].output
x = GlobalAveragePooling2D()(x)  # Global Average Pooling to reduce dimensions to (None, 1)
x = Dropout(0.1)(x)
output = Dense(128, activation='relu')(x)

model2 = Model(inputs=mobile.input, outputs=output)
model2.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 16, 16, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 16, 16, 32)   128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 16, 16, 32)   0           bn_Conv1[0][0]                   
______________________________________________________________________________________________

In [12]:
combined = concatenate([model1.output,model2.output])
prediction = Dense(15, activation='softmax')(combined)

fin_model = Model(inputs=[model1.input,model2.input], outputs=prediction)
fin_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 16, 16, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 16, 16, 32)   128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, 16, 16, 32)   0           bn_Conv1[0][0]                   
____________________________________________________________________________________________

In [13]:
fin_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'])

In [14]:
epochs = 15
batch_size = 32


fin_model.fit(
    [train_images, train_landmarks],  
    train_labels_encoded,                     
    epochs=epochs,
    batch_size=batch_size,
    validation_data=([val_images, val_landmarks], valid_labels_encoded),
    verbose = 1
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
  1/330 [..............................] - ETA: 29:23 - loss: 0.2765 - accuracy: 1.0000

KeyboardInterrupt: 

In [15]:
# Modify the variable names and paths for validation data
test_path = 'C:/Users/Prasanna P M/Human Project/ML_aug/Human Action Recognition/TEST_landmarks/'
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet.preprocess_input)

# Generate batches of image data and landmarks from the validation directory
test_images, test_landmarks, test_labels = flow_from_directory_with_landmarks(
    test_batches,
    directory=test_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True
)

# Verify the shapes of the validation data
print(test_images.shape)       # Shape of val_images
print(test_landmarks.shape)    # Shape of val_landmarks
print(test_labels.shape)       # Shape of val_labels


Found 1525 images belonging to 15 classes.
(1525, 224, 224, 3)
(1525, 32, 32, 3)
(1525,)


In [16]:
test_labels_encoded = tf.keras.utils.to_categorical(test_labels, num_classes=15)


In [17]:

test_loss, test_accuracy = fin_model.evaluate([test_images, test_landmarks], test_labels_encoded)

# Print the test loss and accuracy
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)

Test Loss: 1.5529298782348633
Test Accuracy: 0.6845901608467102
