# Extracting frames and landmarks from dataset

# %load_ext cudf.pandas

In [None]:
from tqdm.notebook import tqdm
import pandas as pd
import mediapipe as mp
import cv2
from pymongo import MongoClient
import gc
from joblib import Parallel, delayed
# import numpy as np
# import matplotlib.pyplot as plt
# from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
# from pandarallel import pandarallel
# import dask.dataframe as dd

In [None]:
mp_holistic = mp.solutions.holistic
# mp_drawing = mp.solutions.drawing_utils

In [None]:
db_name = 'mydb'
input_collection_name = 'Sign_Language_Final_Data'
output_collection_name = 'Sign_Language_Processed_Data'

In [None]:
connection = MongoClient('localhost', 27017)
db = connection[db_name]
input_collection = db[input_collection_name]
cursor = input_collection.find({})
df = pd.DataFrame(list(cursor))
connection.close()  # Close the initial connection

In [None]:
df.head()

In [None]:
df.drop(columns = ['_id', 'source', 'url'], inplace = True)

In [None]:
df.head()

In [None]:
df['video_id'] = df['video_id'].str.replace('v_id_', '')

In [None]:
df.head()

In [None]:
len(df)

In [None]:
df['fps'].unique().tolist()

In [None]:
df['frame_start'].unique().tolist()

In [None]:
df['frame_end'].unique().tolist()

In [None]:
df['split'].unique().tolist()

In [None]:
(df['split'] == 'train').sum()

In [None]:
(df['split'] == 'test').sum()

In [None]:
(df['split'] == 'val').sum()

In [None]:
df['gloss'].nunique()

In [None]:
df.dtypes

In [None]:
def count_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

### Normalizing the Landmark points
- It is a really important step which allows use to generated similar landmark pssotion values for same gestures performed in different orientation by introducing
    - Position Invariance
    - Scale Invariance

In [None]:
def normalize_landmarks(x , y, z , x_min , y_min , x_max , y_max , f_width , f_height):
  
  # Handle different types of landmark inputs
  # if hasattr(landmarks, 'landmark'):
  #       landmark_list = [lm for lm in landmarks.landmark]  # For face_landmarks
  # else:
  #       landmark_list = landmarks  # For hand landmarks that are already a list
 

  normalized = []

  width = x_max - x_min
  height = y_max - y_min
    
  norm_x = ((x *f_width) - x_min)/(width)
  norm_y = ((y *f_height) - y_min)/(height)
  norm_z = z

  normalized.append((norm_x , norm_y , norm_z))
  
  return normalized

In [None]:
def format_frames(frame , output_size , x_min , y_min , x_max , y_max , f_width , f_height):
  
  n_width , n_height = output_size

  # Along with resizing the frames we need to rescale the bounding box values

  width_scale_factor = (n_width / f_width )
  height_scale_factor = (n_height / f_height)

  n_x_min = x_min * width_scale_factor
  n_y_min =  y_min * height_scale_factor
  n_x_max = x_max * width_scale_factor
  n_y_max = y_max * height_scale_factor

  # frame = tf.image.convert_image_dtype(frame , tf.float32)

  frame = cv2.resize(frame , output_size)

  return frame ,  n_x_min , n_y_min , n_x_max , n_y_max

In [None]:
def extract_landmarks(row):
    video_path = f'./kaggle-dataset/videos/{row.video_id}.mp4'
    cap = cv2.VideoCapture(video_path)  # Open video for each call

    try:
        # Get total frame count for the video
        total_frames = count_frames(video_path)
        frame_start = row.frame_start
        frame_end = total_frames - 1 if row.frame_end == -1 else row.frame_end

        x_min, y_min, x_max, y_max = row.bbox
        skip_interval = int(row.fps / 10)

        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # Initialize list to hold landmarks for this row
        landmarks_sequence = []
        cropped_width = x_max - x_min
        cropped_height = y_max - y_min

        # sequence_length = 30

        # skip_frames_window = max(int(total_frames/sequence_length) , 1)


        with mp_holistic.Holistic(static_image_mode=False, model_complexity=2) as holistic:
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_start)

            while cap.isOpened() and frame_start <= frame_end:
                # for frame_counter in range(sequence_length):
                    # current_frame = cap.set(cv2.CAP_PROP_POS_FRAMES , frame_counter * skip_frames_window)
                    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
                    
                    if current_frame > frame_end:
                        break

                    ret, frame = cap.read()
                    if not ret:
                        break

                    frame , n_x_min , n_y_min , n_x_max , n_y_max = format_frames(frame , output_size=(224 , 224) , x_min=x_min ,y_min= y_min ,x_max= x_max , y_max=y_max ,f_width= frame_width ,f_height= frame_height)

                    
                    cropped_frame = frame[int(n_y_min):int(n_y_max), int(n_x_min):int(n_x_max)]
                    image_rgb = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)
                    holistic.image_dimensions = (cropped_width, cropped_height)
                    results = holistic.process(image_rgb)

                    # Create a landmarks dictionary without storing None values
                    frame_landmarks = {}
                    if results.face_landmarks:
                        frame_landmarks["face"] = [normalize_landmarks(lm.x, lm.y, lm.z , n_x_min , n_y_min , n_x_max , n_y_max , f_width= 224, f_height=224) for lm in results.face_landmarks.landmark]
                    
                    else: 
                        frame_landmarks['face'] = [( 0 , 0 , 0) for _ in range(468)]

                    if results.left_hand_landmarks:
                        frame_landmarks["left_hand"] = [normalize_landmarks(lm.x, lm.y, lm.z , n_x_min , n_y_min , n_x_max , n_y_max , f_width=224 , f_height=224) for lm in results.left_hand_landmarks.landmark]

                    else:
                        frame_landmarks['left_hand'] = [( 0 , 0 , 0) for _ in range(21)]

                    if results.right_hand_landmarks:
                        frame_landmarks["right_hand"] = [normalize_landmarks(lm.x, lm.y, lm.z , n_x_min , n_y_min , n_x_max , y_max , f_width=224 , f_height=224) for lm in results.right_hand_landmarks.landmark]
                    
                    else:
                        frame_landmarks['right_hand'] = [( 0 , 0 , 0) for _ in range(21)]
                    
                    landmarks_sequence.append(frame_landmarks)
                    frame_start += skip_interval
                    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_start)

                # while len(landmarks_sequence) < sequence_length:

                #     zero_landmarks = { 
                #         'face' : [(0 , 0 , 0) for _ in range(468)],
                #         'left_hand' : [(0 , 0 , 0) for _ in range(21)],
                #         'right_hand' : [(0 , 0 , 0) for _ in range(21) ]
                    
                #     }

                #     landmarks_sequence.append(zero_landmarks)
    finally:
        cap.release()  # Ensure cap is released no matter what

    return {"gloss": row.gloss, "instance_id": row.instance_id, "landmarks_sequence": landmarks_sequence}

In [None]:
# import warnings
# warnings.filterwarnings("ignore")

# import logging

# # Configure logging
# logging.basicConfig(level=logging.INFO)



# tqdm.pandas(desc="Processing videos")






# some_rows = df.iloc[[0, 1, 2, 3, 4,]]

# n_jobs = -1

# results = Parallel(n_jobs=n_jobs, backend="threading")(
#     delayed(extract_landmarks)(row) for row in tqdm(df.itertuples(), total=len(df))
# )














batch_size = 20
n_jobs = -1

for start_idx in range(0, len(df), batch_size):
    end_idx = min(start_idx + batch_size, len(df))
    batch = df.iloc[start_idx:end_idx].copy()  # Copy batch to avoid reference issues

    # Create and run the Parallel instance for the current batch
    with Parallel(n_jobs=n_jobs, backend="multiprocessing") as parallel:
        batch_results = parallel(
            delayed(extract_landmarks)(row) for _, row in tqdm(batch.iterrows(), total=len(batch))
        )
    
    # Re-establish MongoDB connection for each batch
    connection = MongoClient('localhost', 27017)
    db = connection[db_name]
    output_collection = db[output_collection_name]

    # Insert batch results into MongoDB
    output_collection.insert_many(batch_results)

    # Close MongoDB connection after each batch
    connection.close()

    # Clear batch and batch results from memory
    del batch, batch_results, parallel  # Explicitly delete the parallel instance
    gc.collect()  # Explicitly trigger garbage collection










# def process_and_clear(row):
#     result = extract_landmarks(row)
#     del row  # Free up memory for each row after processing
#     return result

# # Run parallel processing with memory-efficient approach
# results = []
# for res in Parallel(n_jobs=n_jobs, backend="threading")(
#     delayed(process_and_clear)(row) for row in tqdm(df.itertuples(), total=len(df))
# ):
#     results.append(res)
#     del res











# dask_df = dd.from_pandas(some_rows, npartitions=14)

# def apply_extract_landmarks(df):
#     return df.apply(extract_landmarks, axis=1)


# from dask.diagnostics import ProgressBar
# with ProgressBar():
#     result = dask_df.map_partitions(apply_extract_landmarks).compute()



# landmarks_data = some_rows.parallel_apply(extract_landmarks, axis = 1)


# def parallel_extract_landmarks(df):
#     with ThreadPoolExecutor() as executor:
#         results = list(executor.map(extract_landmarks, [row for _, row in df.iterrows()]))
#     return pd.DataFrame(results)



# results_df = parallel_extract_landmarks(some_rows)







# some_rows.progress_apply(extract_landmarks, axis = 1)

# with ThreadPoolExecutor(max_workers=20) as executor:
#     futures = {executor.submit(extract_landmarks, row): index for index, row in some_rows.iterrows()}
#     landmarks_data = []

#     for future in tqdm(futures, desc="Collecting results"):
#         result = future.result()
#         landmarks_data.append(result)
#         # logging.info(f"Collected landmarks for gloss: {result[0]}, instance_id: {result[1]}")

# landmarks_df = pd.DataFrame(landmarks_data, columns=["gloss", "instance_id", "landmarks_sequence"])

# # logging.info(landmarks_df)

# merged_df = pd.merge(df, landmarks_df, on=['gloss', 'instance_id'], how='inner')

# # Release all video captures after processing
# for cap in video_capture_cache.values():
#     cap.release()


# holistic.close()
# gc.collect()

In [None]:
landmarks_df = pd.DataFrame(results, columns=["gloss","instance_id", "landmarks_sequence"])

In [None]:
landmarks_df

In [None]:
merged_df = pd.merge(df, landmarks_df, on=['gloss', 'instance_id'], how='inner')  # You can change 'inner' to 'outer', 'left', or 'right' depending on the merge type you want.

In [None]:
merged_df

In [None]:
# cap = cv2.VideoCapture(0)

# with mp_holistic.Holistic(static_image_mode=False, 
#                            model_complexity=2, 
#                            enable_segmentation=True,
#                            min_detection_confidence=0.5,
#                            min_tracking_confidence=0.5) as holistic:
#     while cap.isOpened():
#         ret,frame = cap.read()
#         if not ret:
#             print("Ignoring empty camera frame")
#             continue

#         frame = cv2.flip(frame, 1)
        
#         image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#         image.flags.writeable = False

#         results = holistic.process(image)

#         image.flags.writeable = True
#         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

#         if results.face_landmarks:
#              mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)
#         # if results.pose_landmarks:
#         #      mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
#         if results.left_hand_landmarks:
#              mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
#         if results.right_hand_landmarks:
#              mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

#         cv2.imshow('TEST', image)

#         if cv2.waitKey(5) & 0xFF == 27:  # Press 'Esc' to exit
#                 break

# cap.release()
# cv2.destroyAllWindows()
