# **Pengenalan MediaPipe dan Implementasi**

1. Praktikum 1: Deteksi Tangan
2. Praktikum 2: Deteksi Wajah dengan Face Mesh
3. Praktikum 3: Deteksi Pose Tubuh
4. Praktikum 4: Tracking Objek 3D dengan Objectron (Opsional)
5. Studi Kasus dan Pengembangan Lanjutan
    *   Menggunakan hasil deteksi untuk interaksi real-time
    *   Integrasi dengan proyek AI lainnya



In [1]:
# Connect to google drive:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1. Deteksi Tangan

In [1]:
%pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp310-cp310-manylinux_2_28_x86_64.whl (35.6 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0mm
[?25hCollecting numpy<2
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting attrs>=19.1.0
  Downloading attrs-25.3.0-py3-none-any.whl (63 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.8/63.8 KB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m[31m1.9 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting flatbuffers>=2.0
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)
Collecting opencv-contrib-python
  Downloading opencv_contrib_python-4.11.0.86-cp37-abi

In [2]:
import cv2
import mediapipe as mp

# Inisialisasi MediaPipe Hands
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Buka kamera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Konversi ke RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Proses dengan MediaPipe
    results = hands.process(frame_rgb)

    # Gambar landmark jika terdeteksi tangan
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Tampilkan hasil
    cv2.imshow("MediaPipe Hands", frame)

    # Tekan 'q' untuk keluar
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Matplotlib is building the font cache; this may take a moment.
I0000 00:00:1742528776.627092   12337 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742528776.632612   12648 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 22.2.5), renderer: STONEY (stoney, LLVM 15.0.6, DRM 3.42, 5.15.0-76-generic)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1742528776.704485   12641 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742528776.746783   12642 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
[ WARN:0@18.595] global cap_v4l.cpp:1048 tryIoctl VIDEOIO(V4L2:/dev/video0): select() timeout.


## 2. Deteksi Wajah dengan Face Mesh

In [11]:
import cv2
import mediapipe as mp

# Inisialisasi MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
mp_draw = mp.solutions.drawing_utils
face_mesh = mp_face_mesh.FaceMesh()

# Buka kamera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Konversi ke RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Proses dengan MediaPipe
    results = face_mesh.process(frame_rgb)

    # Gambar face mesh jika terdeteksi wajah
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp_draw.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION)

    # Tampilkan hasil
    cv2.imshow("MediaPipe Face Mesh", frame)

    # Tekan 'q' untuk keluar
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [7]:
import cv2
import mediapipe as mp

# Inisialisasi Face Mesh
mp_face_mesh = mp.solutions.face_mesh
mp_draw = mp.solutions.drawing_utils
mp_styles = mp.solutions.drawing_styles

face_mesh = mp_face_mesh.FaceMesh(refine_landmarks=True)  # refine_landmarks=True untuk lebih presisi

# Buka kamera
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Konversi ke RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Proses dengan Face Mesh
    results = face_mesh.process(frame_rgb)

    # Jika ada wajah terdeteksi
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            # Gambar hanya kontur wajah (bukan semua 468 titik)
            mp_draw.draw_landmarks(
                frame, 
                face_landmarks, 
                mp_face_mesh.FACEMESH_CONTOURS,  # Hanya gambar kontur
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_styles.get_default_face_mesh_contours_style()
            )

    # Tampilkan hasil
    cv2.imshow("Face Mesh - Hanya Kontur Wajah", frame)

    # Tekan 'q' untuk keluar
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp

# Inisialisasi Face Mesh
mp_face_mesh = mp.solutions.face_mesh
mp_draw = mp.solutions.drawing_utils
mp_styles = mp.solutions.drawing_styles

face_mesh = mp_face_mesh.FaceMesh(refine_landmarks=True)

# Buka kamera
cap = cv2.VideoCapture(1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Konversi ke RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Proses dengan Face Mesh
    results = face_mesh.process(frame_rgb)

    # Jika ada wajah terdeteksi
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            # Gambar hanya mata dan bibir
            mp_draw.draw_landmarks(
                frame, 
                face_landmarks, 
                mp_face_mesh.FACEMESH_LIPS,  # Hanya bibir
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_styles.get_default_face_mesh_contours_style()
            )
            mp_draw.draw_landmarks(
                frame, 
                face_landmarks, 
                mp_face_mesh.FACEMESH_LEFT_EYE,  # Mata kiri
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_styles.get_default_face_mesh_contours_style()
            )
            mp_draw.draw_landmarks(
                frame, 
                face_landmarks, 
                mp_face_mesh.FACEMESH_RIGHT_EYE,  # Mata kanan
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_styles.get_default_face_mesh_contours_style()
            )

    # Tampilkan hasil
    cv2.imshow("Face Mesh - Hanya Mata & Bibir", frame)

    # Tekan 'q' untuk keluar
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1742528943.405050   12337 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742528943.416900   12930 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 22.2.5), renderer: STONEY (stoney, LLVM 15.0.6, DRM 3.42, 5.15.0-76-generic)
W0000 00:00:1742528943.445514   12928 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742528943.515690   12928 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742528944.256243   12928 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


KeyboardInterrupt: 

: 

## 3. Deteksi Pose Tubuh dengan MediaPipe Pose

In [None]:
import cv2
import mediapipe as mp

# Inisialisasi MediaPipe Pose
mp_pose = mp.solutions.pose
mp_draw = mp.solutions.drawing_utils
pose = mp_pose.Pose()

# Buka kamera
cap = cv2.VideoCapture(1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Konversi ke RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Proses dengan MediaPipe
    results = pose.process(frame_rgb)

    # Gambar landmark jika terdeteksi pose
    if results.pose_landmarks:
        mp_draw.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # Tampilkan hasil
    cv2.imshow("MediaPipe Pose", frame)

    # Tekan 'q' untuk keluar
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1742529002.184969   12957 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742529002.191461   12992 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 22.2.5), renderer: STONEY (stoney, LLVM 15.0.6, DRM 3.42, 5.15.0-76-generic)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1742529002.411179   12985 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742529002.550343   12986 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742529003.652015   12986 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


KeyboardInterrupt: 

: 

## Dataset Berupa Gambar (JPG, PNG, dll.)

In [13]:
import cv2
import mediapipe as mp
import os

# Inisialisasi Face Landmarker
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(refine_landmarks=True)
mp_draw = mp.solutions.drawing_utils


# Path dataset gambar
image_folder = "dataset/gambar/"
output_folder = "output_images/"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Proses setiap gambar
for image_file in os.listdir(image_folder):
    if not image_file.endswith((".jpg", ".png", ".jpeg")):
        continue  # Skip file non-gambar

    image_path = os.path.join(image_folder, image_file)
    frame = cv2.imread(image_path)

    # Konversi ke RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)

    # Jika ada wajah, gambar landmark
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp_draw.draw_landmarks(
                frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION
            )

    # Simpan hasil gambar
    output_path = os.path.join(output_folder, image_file)
    cv2.imwrite(output_path, frame)

    # Tampilkan hasil (opsional)
    cv2.imshow("Face Landmarks", frame)
    cv2.waitKey(500)  # Tampilkan sebentar per gambar

cv2.destroyAllWindows()

print(f"Semua gambar telah diproses dan disimpan di {output_folder}")


Semua gambar telah diproses dan disimpan di output_images/


## Dataset Berupa Video (MP4, AVI, dll.)

In [None]:
import cv2
import mediapipe as mp
import csv
import os

# Inisialisasi MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
mp_draw = mp.solutions.drawing_utils

# Path folder dataset
video_folder = "dataset/video/"
output_csv = "dataset_pose_landmarks.csv"

# Buat atau buka file CSV
file_exists = os.path.isfile(output_csv)
with open(output_csv, mode="a", newline="") as file:
    writer = csv.writer(file)

    # Header CSV jika file baru
    if not file_exists:
        header = ["video_name", "frame"]
        for i in range(33):  # 33 titik landmark pose
            header.extend([f"x_{i}", f"y_{i}", f"z_{i}", f"v_{i}"]) # x, y, z, visibility
        writer.writerow(header)

    # Proses setiap video dalam folder dataset
    for video_file in os.listdir(video_folder):
        if not video_file.endswith((".mp4", ".avi", ".mov")):
            continue  # Skip file non-video

        video_path = os.path.join(video_folder, video_file)
        cap = cv2.VideoCapture(video_path)
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Konversi ke RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = pose.process(frame_rgb)

            # Jika ada pose terdeteksi, simpan ke CSV
            if results.pose_landmarks:
                row = [video_file, frame_count]
                for lm in results.pose_landmarks.landmark:
                    row.extend([lm.x, lm.y, lm.z, lm.visibility])
                writer.writerow(row)

                # Gambar landmark (opsional)
                mp_draw.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

            # Tampilkan hasil (opsional)
            cv2.imshow("Processing Video Dataset", frame)
            frame_count += 1

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()

cv2.destroyAllWindows()

I0000 00:00:1742530604.238217   17894 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1742530604.247063   17960 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 22.2.5), renderer: STONEY (stoney, LLVM 15.0.6, DRM 3.42, 5.15.0-76-generic)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1742530604.854197   17954 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742530605.101930   17953 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1742530605.213536   17954 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


KeyboardInterrupt: 

: 