# install dependencies

In [None]:
%pip install tensorflow numpy mediapipe opencv-python tensorflowjs pyautogui

# Import Library

In [1]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os




# Deteksi Tangan dengan MediaPipe

In [2]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Ambil Data Latihan

In [3]:
DATA_DIR = "hand_data"
os.makedirs(DATA_DIR, exist_ok=True)

# Daftar gesture yang akan direkam
gestures = ["ss", "transfer_SS"]
num_samples = 100  # Jumlah data per gesture

cap = cv2.VideoCapture(0)

for gesture_name in gestures:
    print(f"Bersiap untuk merekam gesture: {gesture_name}")
    print("Tekan 's' untuk mulai merekam...")
    
    while True:
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)
        cv2.putText(frame, f"Tekan 's' untuk rekam {gesture_name}", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.imshow("Hand Tracking", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('s'):
            break
    
    print(f"Merekam {num_samples} sampel untuk gesture: {gesture_name}")
    data, labels = [], []
    
    for i in range(num_samples):
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)
        
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                landmarks = []
                for lm in hand_landmarks.landmark:
                    landmarks.append(lm.x)
                    landmarks.append(lm.y)
                data.append(landmarks)
                labels.append(gesture_name)
                
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        cv2.putText(frame, f"Recording {gesture_name}: {i+1}/{num_samples}", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.imshow("Hand Tracking", frame)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    
    # Simpan data
    np.save(os.path.join(DATA_DIR, f"{gesture_name}_data.npy"), np.array(data))
    np.save(os.path.join(DATA_DIR, f"{gesture_name}_labels.npy"), np.array(labels))
    print(f"Selesai merekam {gesture_name}! Data disimpan.")

cap.release()
cv2.destroyAllWindows()
print("Semua data gesture telah direkam dan disimpan.")

Bersiap untuk merekam gesture: ss
Tekan 's' untuk mulai merekam...
Merekam 100 sampel untuk gesture: ss
Selesai merekam ss! Data disimpan.
Bersiap untuk merekam gesture: transfer_SS
Tekan 's' untuk mulai merekam...
Merekam 100 sampel untuk gesture: transfer_SS
Selesai merekam transfer_SS! Data disimpan.
Semua data gesture telah direkam dan disimpan.


# Load Data dan Preprocessing

In [4]:
data_files = [f for f in os.listdir(DATA_DIR) if "data.npy" in f]
label_files = [f for f in os.listdir(DATA_DIR) if "labels.npy" in f]

X, y = [], []
for file in data_files:
    X.append(np.load(os.path.join(DATA_DIR, file)))
for file in label_files:
    y.append(np.load(os.path.join(DATA_DIR, file)))

X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)

# Encode label
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

# Reshape untuk LSTM
X = X.reshape(X.shape[0], 1, X.shape[1])

# Train Model LSTM

In [5]:
model = Sequential([
    LSTM(64, return_sequences=True, activation='relu', input_shape=(1, X.shape[2])),
    LSTM(64, return_sequences=False, activation='relu'),
    Dense(64, activation='relu'),
    Dense(len(set(y)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=30, batch_size=16)



Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x26af4b05a90>

# Simpan Model

In [1]:
model.save("hand_gesture_model.h5")

NameError: name 'model' is not defined

# Jalankan Model di Jupyter Notebook untuk Prediksi

In [12]:
# Load kembali model yang telah disimpan
model = tf.keras.models.load_model("hand_gesture_model.h5")

# Buka kamera untuk mendeteksi tangan dan melakukan prediksi
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)  # Flip agar sesuai dengan tampilan asli
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.append(lm.x)
                landmarks.append(lm.y)

            # Konversi ke numpy array dan reshape agar sesuai input LSTM
            landmarks = np.array(landmarks).reshape(1, 1, -1)

            # Prediksi menggunakan model
            prediction = model.predict(landmarks)
            class_index = np.argmax(prediction)
            class_label = le.inverse_transform([class_index])[0]

            # Tampilkan label hasil prediksi pada layar
            cv2.putText(frame, class_label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

            # Gambar landmark tangan
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    cv2.imshow("Hand Gesture Recognition", frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




# SS with gesture

In [10]:
%pip install pyautogui

Collecting pyautogui
  Downloading PyAutoGUI-0.9.54.tar.gz (61 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting pymsgbox (from pyautogui)
  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting pytweening>=1.0.4 (from pyautogui)
  Downloading pytweening-1.2.0.tar.gz (171 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'don


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import requests
from PIL import ImageGrab
from sklearn.preprocessing import LabelEncoder
import time

# --- Inisialisasi Model dan MediaPipe Hands ---
model = tf.keras.models.load_model("hand_gesture_model.h5")

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    max_num_hands=2,
    model_complexity=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
)

# --- Inisialisasi Label Encoder ---
# Ganti dengan label gesture yang kamu punya
labels = ['copy', 'paste']  
le = LabelEncoder()
le.fit(labels)

# --- Fungsi upload screenshot ke API ---
def upload_screenshot(img):
    _, buffer = cv2.imencode('.png', img)
    files = {'file': ('screenshot.png', buffer.tobytes(), 'image/png')}
    try:
        res = requests.post("http://127.0.0.1:5000/api/image", files=files)
        print("Upload response:", res.text)
    except Exception as e:
        print("Upload error:", e)

# --- Setup kamera dan variabel kontrol ---
cap = cv2.VideoCapture(0)
last_upload_time = 0
upload_cooldown = 5  # detik cooldown agar gak spam upload

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame")
        break

    frame = cv2.flip(frame, 1)  # mirror agar tampilan natural
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_frame)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.append(lm.x)
                landmarks.append(lm.y)

            landmarks = np.array(landmarks).reshape(1, 1, -1)

            try:
                prediction = model.predict(landmarks)
                class_index = np.argmax(prediction)
                class_label = le.inverse_transform([class_index])[0]
            except Exception as e:
                print("Prediction error:", e)
                class_label = "unknown"

            # Tampilkan hasil prediksi di frame
            cv2.putText(frame, f"Gesture: {class_label}", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Jika gesture 'copy' terdeteksi dan cooldown sudah lewat, screenshot dan upload
            if class_label.lower() == "copy":
                current_time = time.time()
                if current_time - last_upload_time > upload_cooldown:
                    print("Gesture COPY detected - taking screenshot and uploading...")
                    screenshot = ImageGrab.grab()
                    screenshot_np = np.array(screenshot)
                    screenshot_np = cv2.cvtColor(screenshot_np, cv2.COLOR_RGB2BGR)
                    upload_screenshot(screenshot_np)
                    last_upload_time = current_time

    else:
        cv2.putText(frame, "No hand detected", (10, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow("Hand Gesture Recognition", frame)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Gesture COPY detected - taking screenshot and uploading...
Upload response: {
  "filename": "screenshot.png",
  "status": "Image uploaded"
}

