In [3]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.image as mpimg
import cv2
import mediapipe as mp
import os
import subprocess
import requests
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Utility Function

In [4]:
def count_file(directory):
    file_count_per_class = {}
    total_files = 0

    for root, dirs, files in os.walk(directory):
        class_name = root.split(os.sep)[-1]
        count = sum(1 for file in files if file.endswith(".png") or file.endswith(".jpg"))
        
        if count > 0:
            file_count_per_class[class_name] = count
            total_files += count

    for class_name, count in file_count_per_class.items():
        print(f"{class_name}: {count}")

    print(f"Total Dataset: {total_files}")
    
count_file('whisnu-dataset-resize')

bagaimana: 377
besok: 388
hari ini: 326
jawab: 373
kantor: 370
kemarin: 376
kerja: 442
lusa: 318
malam: 354
nanti: 347
pagi: 376
sekarang: 307
siang: 348
sore: 309
Total Dataset: 5011


# Generate Annotated Raw

In [7]:
def get_valid_landmarks(hand_landmarks, image_width, image_height):
    valid_landmarks = []
    for landmark in hand_landmarks.landmark:
        # Convert landmark coordinates to pixel values
        x = int(landmark.x * image_width)
        y = int(landmark.y * image_height)
        
        # Check if the landmark is within the image bounds
        if 0 <= x < image_width and 0 <= y < image_height:
            valid_landmarks.append((x, y))
    
    return valid_landmarks


def annotate_hand_images(input_directory, output_directory):
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles
    mp_hands = mp.solutions.hands

    hand_notfound_images = []
    incomplete_landmark_hand = []

    os.makedirs(output_directory, exist_ok=True)

    for root, dirs, files in os.walk(input_directory):
        for file in files:
            if file.lower().endswith(".jpg"):
                input_file = os.path.join(root, file)
                output_subdir = os.path.join(output_directory, os.path.relpath(root, input_directory))
                os.makedirs(output_subdir, exist_ok=True)

                with mp_hands.Hands(
                    static_image_mode=True,
                    max_num_hands=2,
                    min_detection_confidence=0.5,
                ) as hands:
                    filename = os.path.splitext(file)[0]
                    print(f"Processing: {filename}")
                    image = cv2.flip(cv2.imread(input_file), 1)
                    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

                    if not results.multi_hand_landmarks:
                        hand_notfound_images.append(input_file)
                        continue

                    image_height, image_width, _ = image.shape
                    annotated_image = image.copy()
                    output_file = os.path.join(output_subdir, f"annotated_{filename}.jpg")

                    for hand_landmarks in results.multi_hand_landmarks:
                        valid_landmarks = get_valid_landmarks(hand_landmarks, image_width, image_height)
                        if len(valid_landmarks) <  21:
                            incomplete_landmark_hand.append(output_file)
                        
                        mp_drawing.draw_landmarks(
                            annotated_image,
                            hand_landmarks,
                            mp_hands.HAND_CONNECTIONS,
                            mp_drawing_styles.get_default_hand_landmarks_style(),
                            mp_drawing_styles.get_default_hand_connections_style())

                    cv2.imwrite(output_file, cv2.flip(annotated_image, 1))
    
    return hand_notfound_images, incomplete_landmark_hand

In [19]:
# Example usage
input_directory = "whisnu-dataset-resize"
output_directory = "whisnu-annotated-raw_dataset"
hand_notfound_images, incomplete_landmark_hand = annotate_hand_images(input_directory, output_directory)
# Optionally, print or save lists of images where hands were not found or landmarks were incomplete
print("Images with no hands detected:", len(hand_notfound_images))
print("Images with incomplete hand landmarks:", len(incomplete_landmark_hand))

Processing: 0_bagaimana_1




Processing: 0_bagaimana_2
Processing: 100_bagaimana_1
Processing: 100_bagaimana_2
Processing: 101_bagaimana_1
Processing: 101_bagaimana_2
Processing: 102_bagaimana_1
Processing: 102_bagaimana_2
Processing: 103_bagaimana_1
Processing: 103_bagaimana_2
Processing: 104_bagaimana_1
Processing: 104_bagaimana_2
Processing: 105_bagaimana_1
Processing: 105_bagaimana_2
Processing: 106_bagaimana_1
Processing: 106_bagaimana_2
Processing: 107_bagaimana_1
Processing: 107_bagaimana_2
Processing: 108_bagaimana_1
Processing: 108_bagaimana_2
Processing: 109_bagaimana_1
Processing: 109_bagaimana_2
Processing: 10_bagaimana_1
Processing: 10_bagaimana_2
Processing: 110_bagaimana_1
Processing: 110_bagaimana_2
Processing: 111_bagaimana_1
Processing: 111_bagaimana_2
Processing: 112_bagaimana_1
Processing: 112_bagaimana_2
Processing: 113_bagaimana_1
Processing: 113_bagaimana_2
Processing: 114_bagaimana_1


KeyboardInterrupt: 

# Filter Image in Hand Not Found and Incomplete Landmark

In [15]:
def get_ori_file(file_path):
    file_name = file_path.split('\\')[-1].replace('annotated_', '')
    class_name = file_name.split('_')[1]
    ori_path_file = os.path.join(f"whisnu-dataset-resize\\{class_name}", file_name)
    return ori_path_file

print(hand_notfound_images[:5])
print(incomplete_landmark_hand[:5])

raw_incomplete_landmark_hand = [get_ori_file(file_path) for file_path in incomplete_landmark_hand]

bad_image = hand_notfound_images + raw_incomplete_landmark_hand
print(bad_image[:2])
print(bad_image[-2:])

['whisnu-dataset-resize\\besok\\100_besok_1.jpg', 'whisnu-dataset-resize\\besok\\100_besok_2.jpg', 'whisnu-dataset-resize\\hari ini\\165_hari_ini1.jpg', 'whisnu-dataset-resize\\hari ini\\165_hari_ini2.jpg', 'whisnu-dataset-resize\\jawab\\112_jawab_1.jpg']
['whisnu-annotated-raw_dataset\\bagaimana\\annotated_202_bagaimana_1.jpg', 'whisnu-annotated-raw_dataset\\bagaimana\\annotated_202_bagaimana_2.jpg', 'whisnu-annotated-raw_dataset\\bagaimana\\annotated_203_bagaimana_1.jpg', 'whisnu-annotated-raw_dataset\\bagaimana\\annotated_203_bagaimana_2.jpg', 'whisnu-annotated-raw_dataset\\bagaimana\\annotated_204_bagaimana_1.jpg']
['whisnu-dataset-resize\\besok\\100_besok_1.jpg', 'whisnu-dataset-resize\\besok\\100_besok_2.jpg']
['whisnu-dataset-resize\\sore\\96_sore_1.jpg', 'whisnu-dataset-resize\\sore\\96_sore_2.jpg']


In [16]:
def hapus_file(list_path):
    for path in list_path:
        try:
            if os.path.isfile(path):
                os.remove(path)
                print(f"File {path} berhasil dihapus.")
            else:
                print(f"File {path} tidak ditemukan atau bukan file.")
        except Exception as e:
            print(f"Terjadi kesalahan saat menghapus file {path}: {e}")

hapus_file(bad_image)

File whisnu-dataset-resize\besok\100_besok_1.jpg berhasil dihapus.
File whisnu-dataset-resize\besok\100_besok_2.jpg berhasil dihapus.
File whisnu-dataset-resize\hari ini\165_hari_ini1.jpg berhasil dihapus.
File whisnu-dataset-resize\hari ini\165_hari_ini2.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\112_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\113_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\213_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\214_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\237_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\247_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\248_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\249_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\251_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\261_jawab_1.jpg berhasil dihapus.
File whisnu-dataset-resize\jawab\262_jawab_1.jpg ber

# Delete Manual Image with Landmark not Precise

how it works? delete data from annotated raw with imprecise landmarks => run code in below

In [2]:
def list_images(directory):
    return [
        os.path.join(root, f)
        for root, _, files in os.walk(directory)
        for f in files
        if f.lower().endswith((".jpg", ".png"))
    ]
    
def delete_dir2_based_dir1(dir1, dir2):
    annotated_files = set(list_images(dir1))
    augmented_files = set(list_images(dir2))

    annotated_files = [os.path.basename(file) for file in annotated_files]
    count = 0
    for file in augmented_files:
        # check if the file is not in annotated_files
        # first add prefix 'annotated_' to the file name
        prefixed_file = "annotated_" + os.path.basename(file)

        if prefixed_file not in annotated_files:
            os.remove(file)
            print(f"File {file} berhasil dihapus.")
            count += 1
    
    print(f"Total file yang dihapus: {count}")

In [3]:
ANNOTATED_DIR = './whisnu-annotated-raw_dataset'
DATA_CLEAN_DIR = './whisnu-dataset-resize'

delete_dir2_based_dir1(ANNOTATED_DIR, DATA_CLEAN_DIR)

File ./whisnu-dataset-resize\hari ini\20_hari_ini2.jpg berhasil dihapus.
File ./whisnu-dataset-resize\bagaimana\33_bagaimana_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\277_kantor_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\sore\84_sore_2.jpg berhasil dihapus.
File ./whisnu-dataset-resize\bagaimana\31_bagaimana_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\sore\44_sore_2.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\299_kantor_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\378_kantor_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\malam\87_malam_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\447_kantor_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\54_kantor_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\218_kantor_1.jpg berhasil dihapus.
File ./whisnu-dataset-resize\bagaimana\44_bagaimana_2.jpg berhasil dihapus.
File ./whisnu-dataset-resize\kantor\110_kantor_1.jpg berhasil dihapus.
Fil

NameError: name 'count_file' is not defined

In [5]:
print('\nImage Left')
count_file('whisnu-dataset-resize')


Image Left
bagaimana: 377
besok: 388
hari ini: 326
jawab: 373
kantor: 370
kemarin: 376
kerja: 442
lusa: 318
malam: 354
nanti: 347
pagi: 376
sekarang: 307
siang: 348
sore: 309
Total Dataset: 5011


# Extraksi Fitur

In [6]:
def extract_hand_features(input_dir, output_filename):
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(
        static_image_mode=True, max_num_hands=1, min_detection_confidence=0.5
    )

    data = []

    for filename in os.listdir(input_dir):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(input_dir, filename)
            image = cv2.imread(image_path)
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = hands.process(image_rgb)

            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    landmarks = []
                    for landmark in hand_landmarks.landmark:
                        landmarks.append(landmark.x)
                        landmarks.append(landmark.y)
                        landmarks.append(landmark.z)
                    data.append([filename] + landmarks)

    hands.close()
    

    columns = ["filename"]
    for i in range(21):  # 21 landmarks
        columns += [f"x_{i}", f"y_{i}", f"z_{i}"]

    df = pd.DataFrame(data, columns=columns)
    df.to_csv(output_filename, index=False)


out_dir = "./whisnu-extraction_feature-dataset/raw"
os.makedirs(out_dir, exist_ok=True)
DATA_DIR = './whisnu-dataset-resize'

for files in os.listdir(DATA_DIR):
    input_dir = os.path.join(DATA_DIR, files)
    output_filename = os.path.join(out_dir, f"{files}.csv")
    extract_hand_features(input_dir, output_filename)



NotADirectoryError: [WinError 267] The directory name is invalid: './whisnu-dataset-resize\\whisnu.zip'

# Now Let's Go Augmented

Goal
1. Augmented 3-5k (let's try 5k each class)
2. Generate Annotated Augmented
3. Filtering No Hand, <21 Landmark and Unprecise Landmark

In [9]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img

In [8]:
count_file('whisnu-dataset-resize')

bagaimana: 377
besok: 388
hari ini: 326
jawab: 373
kantor: 370
kemarin: 376
kerja: 442
lusa: 318
malam: 354
nanti: 347
pagi: 376
sekarang: 307
siang: 348
sore: 309
Total Dataset: 5011


In [None]:
done_augmented_list = []

In [13]:
def augment_images(input_dir, output_dir, num_augmentations=10):
    # Define the augmentation parameters
    datagen = ImageDataGenerator(
        rotation_range=2,
        width_shift_range=0.075,
        height_shift_range=0.075,
        shear_range=0.075,
        zoom_range=0.075,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    print(f'Augmenting images in {input_dir}')
    for idx, (root, dirs, files) in enumerate(os.walk(input_dir)):
        print(f'Processing {root}')
        print(f'Files: {len(files)}')
        for i, file in enumerate(files):
            class_name = file.split('_')[1]
            
            if class_name not in done_augmented_list:
                continue        

            
            if file.lower().endswith(('png', 'jpg', 'jpeg')):
                img_path = os.path.join(root, file)
                img = load_img(img_path)
                x = img_to_array(img)
                x = np.expand_dims(x, axis=0)
                
                print(f'{idx} - {i} - {class_name}')
                output_path = os.path.join(output_dir, class_name)
                if not os.path.exists(output_path):
                    os.makedirs(output_path)
                
                orig_img_path = os.path.join(output_path, f'ori_{file}')
                img.save(orig_img_path)
                
                i = 0
                for batch in datagen.flow(x, batch_size=1):
                    aug_img = array_to_img(batch[0])
                    os.makedirs(output_path, exist_ok=True)
                    aug_img.save(os.path.join(output_path, f'aug-{i}_{file}'))
                    i += 1
                    if i >= num_augmentations:
                        break

DATASET_DIR = './whisnu-dataset-resize'
AUGMENTED_DIR = './whisnu-augmented-dataset'
augment_images(DATASET_DIR, AUGMENTED_DIR)

print("Dataset After Augmented")
count_file(AUGMENTED_DIR)

Augmenting images in ./whisnu-dataset-resize
Processing ./whisnu-dataset-resize
Files: 0
Processing ./whisnu-dataset-resize\bagaimana
Files: 377
1 - 0 - bagaimana
1 - 1 - bagaimana
1 - 2 - bagaimana
1 - 3 - bagaimana
1 - 4 - bagaimana
1 - 5 - bagaimana
1 - 6 - bagaimana
1 - 7 - bagaimana
1 - 8 - bagaimana
1 - 9 - bagaimana
1 - 10 - bagaimana
1 - 11 - bagaimana
1 - 12 - bagaimana
1 - 13 - bagaimana
1 - 14 - bagaimana
1 - 15 - bagaimana
1 - 16 - bagaimana
1 - 17 - bagaimana
1 - 18 - bagaimana
1 - 19 - bagaimana
1 - 20 - bagaimana
1 - 21 - bagaimana
1 - 22 - bagaimana
1 - 23 - bagaimana
1 - 24 - bagaimana
1 - 25 - bagaimana
1 - 26 - bagaimana
1 - 27 - bagaimana
1 - 28 - bagaimana
1 - 29 - bagaimana
1 - 30 - bagaimana
1 - 31 - bagaimana
1 - 32 - bagaimana
1 - 33 - bagaimana
1 - 34 - bagaimana
1 - 35 - bagaimana
1 - 36 - bagaimana
1 - 37 - bagaimana
1 - 38 - bagaimana
1 - 39 - bagaimana
1 - 40 - bagaimana
1 - 41 - bagaimana
1 - 42 - bagaimana
1 - 43 - bagaimana
1 - 44 - bagaimana
1 - 45 - b

In [6]:
count_file('whisnu-augmented-dataset')

bagaimana: 4147
besok: 4268
hari: 3586
jawab: 4103
kantor: 4070
kemarin: 4136
kerja: 4862
lusa: 3498
malam: 3894
nanti: 3817
pagi: 4136
sekarang: 3377
siang: 3828
sore: 3399
Total Dataset: 55121


# Generate Annotated Augmented

In [8]:
# Example usage
input_directory = "whisnu-augmented-dataset"
output_directory = "whisnu-annotated-augmented_dataset"
hand_notfound_images, incomplete_landmark_hand = annotate_hand_images(input_directory, output_directory)
# Optionally, print or save lists of images where hands were not found or landmarks were incomplete
print("Images with no hands detected:", len(hand_notfound_images))
print("Images with incomplete hand landmarks:", len(incomplete_landmark_hand))

Processing: aug-0_0_bagaimana_1
Processing: aug-0_0_bagaimana_2




Processing: aug-0_100_bagaimana_1
Processing: aug-0_100_bagaimana_2
Processing: aug-0_101_bagaimana_1
Processing: aug-0_101_bagaimana_2
Processing: aug-0_102_bagaimana_1
Processing: aug-0_102_bagaimana_2
Processing: aug-0_103_bagaimana_1
Processing: aug-0_103_bagaimana_2
Processing: aug-0_104_bagaimana_1
Processing: aug-0_104_bagaimana_2
Processing: aug-0_105_bagaimana_1
Processing: aug-0_105_bagaimana_2
Processing: aug-0_106_bagaimana_1
Processing: aug-0_106_bagaimana_2
Processing: aug-0_107_bagaimana_1
Processing: aug-0_107_bagaimana_2
Processing: aug-0_108_bagaimana_1
Processing: aug-0_108_bagaimana_2
Processing: aug-0_109_bagaimana_1
Processing: aug-0_109_bagaimana_2
Processing: aug-0_10_bagaimana_1
Processing: aug-0_10_bagaimana_2
Processing: aug-0_110_bagaimana_1
Processing: aug-0_110_bagaimana_2
Processing: aug-0_111_bagaimana_1
Processing: aug-0_111_bagaimana_2
Processing: aug-0_112_bagaimana_1
Processing: aug-0_112_bagaimana_2
Processing: aug-0_113_bagaimana_1
Processing: aug-

# Filter Augmented Data