<a href="https://colab.research.google.com/github/RitikaVerma55/Student-Engagement-Analysis-Posture-Emotion-Tracking/blob/main/Posture_Emotion_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow

In [None]:
!pip install opencv-python

In [None]:
!pip install mediapipe

In [None]:
!pip install scikit-learn

In [None]:
!pip install matplotlib

In [None]:
import tensorflow as tf

# Check if GPU is available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    device = 'cuda'
else:
    device = 'cpu'

print(device)

In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [None]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # drawing Utilities

In [None]:
def mediapipe_detection(image, model):
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  image.flags.writeable = False
  results = model.process(image)              # making predictions for joints
  image.flags.writeable =True
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  return image, results


In [None]:
def draw_landmarks(image, results):
    # Draw face landmarks with smaller circles and thinner lines
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                               landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=1,
                                                                             circle_radius=1))

    # Draw pose landmarks with thinner lines
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                               landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=1))

    # Draw hand landmarks with smaller circles and thinner lines
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                               landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1,
                                                                             circle_radius=1))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                               landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1,
                                                                             circle_radius=1))


In [None]:
from google.colab.patches import cv2_imshow

In [None]:
def extract_keypoints(results):
  pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
  face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
  lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
  rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
  return np.concatenate([pose, face, lh, rh])

In [None]:
!pip install imgaug

In [None]:
import os
import cv2
import numpy as np
from imgaug import augmenters as iaa


DATA_PATH = '/content/drive/MyDrive/DATA_PATH'
actions = [ 'dozz', 'study', 'yawn']
POSTURES_PATH = '/content/drive/MyDrive/posture_1'
sequence_length = 50


# Function to process a video file and save keypoints
def process_video(file_path, action, sequence_count, augmentation):
    cap = cv2.VideoCapture(file_path)
    frame_count = 0

    # Set mediapipe model
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        # Loop through video frames
        while cap.isOpened() and frame_count < sequence_length:
            # Read feed
            ret, frame = cap.read()
            if not ret:
                break

            if augmentation:
                # Data augmentation
                augmented_frame = augmentation.augment_image(frame)
                frame = augmented_frame


            # Make detections
            image, results = mediapipe_detection(frame, holistic)

            # Extract keypoints
            keypoints = extract_keypoints(results)


            # Export keypoints
            sequence_dir = os.path.join(DATA_PATH, action, f"sequence_{sequence_count}")
            if not os.path.exists(sequence_dir):
                os.makedirs(sequence_dir)

            # Save keypoints for current frame
            npy_path = os.path.join(sequence_dir, f"frame_{frame_count}.npy")
            print(keypoints)
            np.save(npy_path, keypoints)

            # Increment frame counter
            frame_count += 1

            # Show frame with landmarks
            cv2.waitKey(100)  # Adjust the delay as needed

        # Release the video capture object
        cap.release()

# Loop through each action folder
for action in actions:
    action_folder = os.path.join(POSTURES_PATH, action)
    sequence_count = 0

    # Iterate over each file in the action folder
    for file in os.listdir(action_folder):
        file_path = os.path.join(action_folder, file)

        # Ensure that the item is a file and ends with '.mp4' extension
        if os.path.isfile(file_path) and file.endswith('.mp4'):
            print(f"Processing video file: {file}")

            # Process original video without augmentation
            process_video(file_path, action, sequence_count, None)


            # Process augmented video with augmentation
            sequence_count += 1


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
label_map = {label:num for num, label in enumerate(actions)}
print(label_map)

In [None]:
sequences, labels = [], []

# Loop through each action
for action in actions:
    action_dir = os.path.join(DATA_PATH, action)

    # Loop through each sequence directory
    for sequence_dir in os.listdir(action_dir):
        sequence_path = os.path.join(action_dir, sequence_dir)

        if os.path.isdir(sequence_path):  # Check if it's a directory
            window = []

            # Loop through each frame in the sequence
            for frame_num in range(sequence_length):
                npy_filename = f"frame_{frame_num}.npy"
                npy_path = os.path.join(sequence_path, npy_filename)

                if os.path.exists(npy_path):  # Check if the file exists
                    res = np.load(npy_path, allow_pickle=True)

                    # Debugging: Print shape of keypoints extracted for each frame
                    print(f"Shape of keypoints for frame {frame_num}: {res.shape}")

                    window.append(res)
                else:
                    print(f"File not found: {npy_path}")

            # Append window to sequences
            sequences.append(window)
            labels.append(label_map[action])

# Debugging: Print shape of the constructed sequences array
print(f"Shape of sequences array: {np.array(sequences).shape}")


In [None]:
np.array(sequences).shape

In [None]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [None]:
from sklearn.model_selection import train_test_split

# First, split the dataset into train and temporary (val_test) sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("y_train shape:", y_train.shape)
print("y_val shape:", y_val.shape)


In [None]:
X_train.shape

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Dense, Flatten
from tensorflow.keras.callbacks import TensorBoard


In [None]:
X_train = np.expand_dims(X_train, axis=1)
X_val = np.expand_dims(X_val, axis =1)

In [None]:
X_train.shape

In [None]:
X_val.shape

In [None]:
y_train.shape

In [None]:
num_actions = len(actions)

model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3), activation='relu', input_shape=(1, 50, 1662, 1), return_sequences=True, padding='same'))
model.add(ConvLSTM2D(filters=64, kernel_size=(3, 3), activation='relu', return_sequences=False, padding='same'))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(num_actions, activation='softmax'))



In [None]:
from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=0.000001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
from keras.callbacks import EarlyStopping

In [None]:
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train, y_train, epochs=100, batch_size=1, validation_data=(X_val, y_val), callbacks=[early_stopping])


In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt

# Define the plot_learning_curve function
def plot_learning_curve(history):
    # Extract loss and validation loss from the history object
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Extract epochs from the history object
    epochs = range(1, len(loss) + 1)

    # Plot loss and validation loss
    plt.figure()
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Assuming `history` object is available from the model training
plot_learning_curve(history)


In [None]:
model.summary()

In [None]:
#######################################################################################
#######################################################################################

In [None]:
import os
import cv2
import numpy as np

data_path = '/content/drive/MyDrive/emotion_1_train/train'
data_dir_list = os.listdir(data_path)

num_channel = 1  # Since your images are grayscale
num_epoch = 10

img_data_list = []

for dataset in data_dir_list:
    img_list = os.listdir(os.path.join(data_path, dataset))
    print('Resizing images to 48x48 for dataset - {}\n'.format(dataset))
    for img in img_list:
        input_img = cv2.imread(os.path.join(data_path, dataset, img))
        input_img_resize = cv2.resize(input_img, (48, 48))
        img_data_list.append(input_img_resize)

img_data = np.array(img_data_list)
img_data = img_data.astype('float32') / 255.0  # Normalize the image data

In [None]:
num_classes = 3

num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')

labels[0:429]=0               #430
labels[430:859]=1             #430
labels[860:1289]=2            #430


names = ['disgust','happy','neutral']

def getLabel(id):
    return ['disgust','happy','neutral'][id]

In [None]:
Y_1 = to_categorical(labels, num_classes)

In [None]:
Y_1.shape

In [None]:
from sklearn.utils import shuffle

In [None]:
x_1,y_1 = shuffle(img_data,Y_1)

In [None]:
X_1_train, X_1_val, y_1_train, y_1_val = train_test_split(x_1, y_1, test_size=0.2)

In [None]:
X_1_train.shape

In [None]:
X_1_val.shape

In [None]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D
from keras.layers import MaxPooling2D
from keras.metrics import categorical_accuracy
from keras.models import model_from_json
from keras.callbacks import ModelCheckpoint
from keras.optimizers import *
from keras.layers import BatchNormalization
import os

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
#from keras.preprocessing.image import ImageDataGenerator

input_shape = (48, 48, 3)

emotion_model = Sequential()

emotion_model.add(Conv2D(32, (3, 3), input_shape=input_shape, padding='same', activation='relu', kernel_regularizer=l2(0.001)))
emotion_model.add(BatchNormalization())
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))

emotion_model.add(Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001)))
emotion_model.add(BatchNormalization())
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))

emotion_model.add(Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(0.001)))
emotion_model.add(BatchNormalization())
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))

emotion_model.add(Flatten())

emotion_model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.001)))
emotion_model.add(BatchNormalization())
emotion_model.add(Dropout(0.3))

emotion_model.add(Dense(3, activation='softmax'))

# Use Adam optimizer with a learning rate of 0.001
optimizer = Adam(learning_rate=0.000001)

emotion_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optimizer)
emotion_model.summary()

In [None]:
import os
from keras import callbacks

filename = 'emotion_model_train_new.csv'
filepath = "Best-weights-my_emotion_model-{epoch:03d}-{loss:.4f}-{acc:.4f}.hdf5.keras"

# Check if the CSV file exists
if not os.path.exists(filename):
    # If the file doesn't exist, create it
    with open(filename, 'w') as f:
        pass  # Create an empty file

csv_log = callbacks.CSVLogger(filename, separator=',', append=False)
checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [csv_log, checkpoint]
callbacks_list = [csv_log]

In [None]:
from keras.callbacks import EarlyStopping

# Define the early stopping callback
early_stopping_1 = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)

# Add the early stopping callback to the list of callbacks
callbacks_list.append(early_stopping_1)

# Train the emotion_model with the added callbacks
hist = emotion_model.fit(X_1_train, y_1_train, batch_size=3, epochs=100, verbose=1, validation_data=(X_1_val, y_1_val), callbacks=callbacks_list)

In [None]:
import matplotlib.pyplot as plt
#rcParams['figure.figsize'] = 4, 4


# Plot training & validation loss values
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('emotion_model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
###################################################################################################################################################
###################################################################################################################################################

In [None]:
def preprocess_face(face_image):
    resized_image = cv2.resize(face_image, (48, 48))
    normalized_image = resized_image.astype('float32') / 255.0
    preprocessed_image = np.expand_dims(normalized_image, axis=0)
    return preprocessed_image

def predict_emotion(emotion_model, preprocessed_image):
    prediction = emotion_model.predict(preprocessed_image)
    emotion_labels = ['disgust', 'Happy', 'Neutral']
    predicted_emotion = emotion_labels[np.argmax(prediction)]
    return predicted_emotion

In [None]:
# Function to detect faces and predict emotions in a frame using facial keypoints
def detect_and_predict_emotions_new(frame):

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(frame_rgb)

    # Extract facial keypoints
    face_landmarks = results.face_landmarks

    # Check if facial keypoints are detected
    if face_landmarks:
        # Convert the facial keypoints to numpy array
        landmarks_array = np.array([[res.x, res.y] for res in face_landmarks.landmark])

        # Get the minimum and maximum x and y coordinates of the facial keypoints
        min_x = int(np.min(landmarks_array[:, 0]) * frame.shape[1])
        max_x = int(np.max(landmarks_array[:, 0]) * frame.shape[1])
        min_y = int(np.min(landmarks_array[:, 1]) * frame.shape[0])
        max_y = int(np.max(landmarks_array[:, 1]) * frame.shape[0])

        # Extract the region of interest (face) from the frame
        face = frame[min_y:max_y, min_x:max_x]

        # Preprocess the face region for emotion detection
        preprocessed_face = preprocess_face(face)

        # Predict the emotion from the preprocessed face
        predicted_emotion = predict_emotion(emotion_model, preprocessed_face)

        # Write the predicted emotion on the frame
        emotion_text = 'Emotion: ' + predicted_emotion
        cv2.putText(frame, emotion_text, (min_x, min_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # Draw a rectangle around the detected face
        cv2.rectangle(frame, (min_x, min_y), (max_x, max_y), (255, 0, 0), 2)

    return frame



In [None]:
colors = [(245,117,16), (117,245,16), (16,117,245),(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

    return output_frame

In [None]:
from google.colab.patches import cv2_imshow

In [None]:
!apt install ffmpeg

In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np

# Load the holistic model
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.02

# Define actions
actions = ['dozz', 'study', 'yawn']

# Your existing code to initialize output directory
output_frames_dir = '/content/drive/MyDrive/output_frames'
os.makedirs(output_frames_dir, exist_ok=True)

# Your existing code to capture video from the source
cap = cv2.VideoCapture('/content/drive/MyDrive/man_with_headphones.mp4')

frame_count = 0
while cap.isOpened() and frame_count < 300:  # Limit to 300 frames for demo
    # Read frame
    ret, frame = cap.read()
    if not ret:
        break

    # Make detections using the holistic model
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB for Mediapipe
    results = holistic.process(image)

    # Get keypoints
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    sequence = sequence[-50:]

    if len(sequence) == 50:
        # Reshape sequence to match the input shape of the model
        sequence_input = np.expand_dims(sequence, axis=0)  # Add batch dimension
        sequence_input = np.expand_dims(sequence_input, axis=1)  # Add channel dimension
        print(sequence_input.shape)

        # Predict action
        res = model.predict(sequence_input)[0]
        predictions.append(np.argmax(res))

        # Visualize prediction probabilities
        image = prob_viz(res, actions, image, colors)

        # Update sentence based on predictions and threshold
        if np.unique(predictions[-10:])[0] == np.argmax(res) and res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])

        if len(sentence) > 5:
            sentence = sentence[-5:]

    # Call emotion detection function
    image = detect_and_predict_emotions_new(image)

    # Draw landmarks
    draw_landmarks(image, results)

    # Call emotion detection function
    #image = detect_and_predict_emotions_new(image)

    # Draw predicted action text
    cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
    cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

    # Save annotated frame
    cv2.imwrite(os.path.join(output_frames_dir, f'frame_{frame_count:04d}.jpg'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

    # Show the frame
    cv2_imshow(image)

    frame_count += 1

cap.release()

# Create video from annotated frames
output_video_path = '/content/drive/MyDrive/output_menHeadphn_video.mp4'
os.system(f'ffmpeg -r 30 -i {output_frames_dir}/frame_%04d.jpg -vcodec libx264 -crf 25 -pix_fmt yuv420p {output_video_path}')


In [None]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np

# Load the holistic model
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.02

# Define actions
actions = ['dozz', 'study', 'yawn']

# Your existing code to initialize output directory
output_frames_dir = '/content/drive/MyDrive/output_frames'
os.makedirs(output_frames_dir, exist_ok=True)

# Your existing code to capture video from the source
cap = cv2.VideoCapture('/content/drive/MyDrive/watermarked_preview (15).mp4')

frame_count = 0
while cap.isOpened() and frame_count < 300:  # Limit to 300 frames for demo
    # Read frame
    ret, frame = cap.read()
    if not ret:
        break

    # Make detections using the holistic model
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB for Mediapipe
    results = holistic.process(image)

    # Get keypoints
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    sequence = sequence[-50:]

    if len(sequence) == 50:
        # Reshape sequence to match the input shape of the model
        sequence_input = np.expand_dims(sequence, axis=0)  # Add batch dimension
        sequence_input = np.expand_dims(sequence_input, axis=1)  # Add channel dimension
        print(sequence_input.shape)

        # Predict action
        res = model.predict(sequence_input)[0]
        predictions.append(np.argmax(res))

        # Visualize prediction probabilities
        image = prob_viz(res, actions, image, colors)

        # Update sentence based on predictions and threshold
        if np.unique(predictions[-10:])[0] == np.argmax(res) and res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])

        if len(sentence) > 5:
            sentence = sentence[-5:]

    # Call emotion detection function
    image = detect_and_predict_emotions_new(image)

    # Draw landmarks
    draw_styled_landmarks(image, results)

    # Call emotion detection function
    #image = detect_and_predict_emotions_new(image)

    # Draw predicted action text
    cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
    cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

    # Save annotated frame
    cv2.imwrite(os.path.join(output_frames_dir, f'frame_{frame_count:04d}.jpg'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

    # Show the frame
    cv2_imshow(image)

    frame_count += 1

cap.release()

# Create video from annotated frames
output_video_path = '/content/drive/MyDrive/output_workingPerson_video.mp4'
os.system(f'ffmpeg -r 30 -i {output_frames_dir}/frame_%04d.jpg -vcodec libx264 -crf 25 -pix_fmt yuv420p {output_video_path}')


In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np

# Load the holistic model
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.02

# Define actions
actions = ['dozz', 'study', 'yawn']

# Your existing code to initialize output directory
output_frames_dir = '/content/drive/MyDrive/output_frames'
os.makedirs(output_frames_dir, exist_ok=True)

# Your existing code to capture video from the source
cap = cv2.VideoCapture('/content/drive/MyDrive/little_girl.mp4')

frame_count = 0
while cap.isOpened() and frame_count < 300:  # Limit to 300 frames for demo
    # Read frame
    ret, frame = cap.read()
    if not ret:
        break

    # Make detections using the holistic model
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB for Mediapipe
    results = holistic.process(image)

    # Get keypoints
    keypoints = extract_keypoints(results)
    sequence.append(keypoints)
    sequence = sequence[-50:]

    if len(sequence) == 50:
        # Reshape sequence to match the input shape of the model
        sequence_input = np.expand_dims(sequence, axis=0)  # Add batch dimension
        sequence_input = np.expand_dims(sequence_input, axis=1)  # Add channel dimension
        print(sequence_input.shape)

        # Predict action
        res = model.predict(sequence_input)[0]
        predictions.append(np.argmax(res))

        # Visualize prediction probabilities
        image = prob_viz(res, actions, image, colors)

        # Update sentence based on predictions and threshold
        if np.unique(predictions[-10:])[0] == np.argmax(res) and res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])

        if len(sentence) > 5:
            sentence = sentence[-5:]

    # Call emotion detection function
    image = detect_and_predict_emotions_new(image)

    # Draw landmarks
    draw_styled_landmarks(image, results)

    # Call emotion detection function
    #image = detect_and_predict_emotions_new(image)

    # Draw predicted action text
    cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
    cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)

    # Save annotated frame
    cv2.imwrite(os.path.join(output_frames_dir, f'frame_{frame_count:04d}.jpg'), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

    # Show the frame
    cv2_imshow(image)

    frame_count += 1

cap.release()

# Create video from annotated frames
output_video_path = '/content/drive/MyDrive/output_littleGirl_video.mp4'
os.system(f'ffmpeg -r 30 -i {output_frames_dir}/frame_%04d.jpg -vcodec libx264 -crf 25 -pix_fmt yuv420p {output_video_path}')
