In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!wget -O "/content/drive/MyDrive/Hand Detection/egohands_data.zip" http://vision.soic.indiana.edu/egohands_files/egohands_data.zip

In [None]:
!ls "/content/drive/MyDrive/Hand Detection/_LABELLED_SAMPLES"

In [None]:
!unzip "/content/drive/MyDrive/Hand Detection/egohands_data.zip" -d "/content/drive/MyDrive/Hand Detection"

In [None]:
!ls "/content/drive/MyDrive/Hand Detection"

Bounding_Boxes_T1   getMetaBy.m		     output_features.csv
DEMO_1.m	    getSegmentationMask.m    processed_HBB.csv
DEMO_2.m	    hand_bounding_boxes.csv  processed_scaled_HBB.csv
egohands_data.zip   HD_EH.ipynb		     README.txt
getBoundingBoxes.m  _LABELLED_SAMPLES	     showLabelsOnFrame.m
getFramePath.m	    metadata.mat	     specific_hand_features_with_frame_id.csv


In [None]:
import cv2
import os
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
from skimage import exposure
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
from functools import partial
from sklearn.preprocessing import StandardScaler

def canny_edge_detection(image, low_threshold=50, high_threshold=150):
    return cv2.Canny(image, low_threshold, high_threshold)

def sobel_edge_detection(image):
    grad_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)
    edges = cv2.magnitude(grad_x, grad_y)
    return edges

def find_hand_contours(image):
    contours, _ = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def convex_hull(contour):
    hull = cv2.convexHull(contour)
    return hull

def convexity_defects(contour, hull):
    defects = cv2.convexityDefects(contour, hull)
    return defects

def aspect_ratio_of_hand(contour):
    x, y, w, h = cv2.boundingRect(contour)
    aspect_ratio = float(w) / h
    return aspect_ratio

def solidity_of_hand(contour, hull):
    area = cv2.contourArea(contour)
    hull_area = cv2.contourArea(hull)
    solidity = float(area) / hull_area if hull_area != 0 else 0
    return solidity

def circularity_of_hand(contour):
    area = cv2.contourArea(contour)
    perimeter = cv2.arcLength(contour, True)
    if perimeter == 0:
        return 0
    circularity = 4 * np.pi * area / (perimeter ** 2)
    return circularity

def lbp_features(image, radius=1, n_points=8):
    lbp = local_binary_pattern(image, n_points, radius, method="uniform")
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    lbp_hist = lbp_hist.astype('float')
    lbp_hist /= (lbp_hist.sum() + 1e-6)
    return lbp_hist

def skin_color_detection(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)
    skin_mask = cv2.inRange(hsv, lower_skin, upper_skin)
    return skin_mask

def hog_features(image):
    fd, hog_image = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), visualize=True)
    hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
    return fd

def detect_fingers(contour, hull):
    defects = cv2.convexityDefects(contour, hull)
    num_fingers = 0
    if defects is not None:
        for i in range(defects.shape[0]):
            s, e, f, d = defects[i, 0]
            if d > 10000:
                num_fingers += 1
    return num_fingers

def extract_hand_features(image):
    features_list = []

    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    edges = canny_edge_detection(gray_image)

    contours = find_hand_contours(edges)

    if contours:
        for contour in contours:
            hull = convex_hull(contour)
            aspect_ratio = aspect_ratio_of_hand(contour)
            solidity = solidity_of_hand(contour, hull)
            circularity = circularity_of_hand(contour)

            x, y, w, h = cv2.boundingRect(contour)
            roi = gray_image[y:y+h, x:x+w]
            lbp_hist = lbp_features(roi)

            skin_mask = skin_color_detection(image)
            skin_mask_roi = skin_mask[y:y+h, x:x+w]

            features = [aspect_ratio, solidity, circularity]
            features.extend(lbp_hist)
            features.extend([np.mean(skin_mask_roi), np.std(skin_mask_roi)])
            features_list.append(features)

    if not features_list:
        default_features = [0] * (3 + len(lbp_hist) + 2)
        features_list.append(default_features)

    return np.array(features_list).flatten()


def save_features_to_csv_from_selected_folders(image_dir, output_csv, folder_names, num_threads):
    features_list = []
    labels = []
    frame_ids = []

    selected_folders = [os.path.join(image_dir, folder_name) for folder_name in folder_names]

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        results = list(tqdm(executor.map(partial(process_folder, num_threads=num_threads), selected_folders), total=len(selected_folders), desc="proccessing folders"))

    for result in results:
        features, label, frame_id = result
        features_list.extend(features)
        labels.extend(label)
        frame_ids.extend(frame_id)

    df = pd.DataFrame(features_list)
    df['label'] = labels
    df['frame_id'] = frame_ids

    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(df.drop(columns=['label', 'frame_id']))

    scaled_df = pd.DataFrame(scaled_features, columns=df.columns[:-2])
    scaled_df['label'] = df['label']
    scaled_df['frame_id'] = df['frame_id']

    scaled_df.to_csv(output_csv, index=False)


def process_folder(folder_path, num_threads):
    features_list = []
    labels = []
    frame_ids = []

    with ThreadPoolExecutor(max_workers=num_threads) as image_executor:
        image_results = list(tqdm(image_executor.map(process_image, os.listdir(folder_path), [folder_path]*len(os.listdir(folder_path))),
                                 total=len(os.listdir(folder_path)), desc=f"proccessining IM in file: {os.path.basename(folder_path)}"))

    for result in image_results:
        features, label, frame_id = result
        features_list.extend(features)
        labels.extend(label)
        frame_ids.extend(frame_id)

    return features_list, labels, frame_ids


def process_image(image_name, folder_path):
    features_list = []
    labels = []
    frame_ids = []

    image_path = os.path.join(folder_path, image_name)

    if not image_name.lower().endswith(('.png', '.jpg', '.jpeg')):
        return features_list, labels, frame_ids

    frame_id = os.path.splitext(image_name)[0]

    try:
        image = cv2.imread(image_path)
        if image is not None:
            features = extract_hand_features(image)

            features_list.append(features)
            labels.append(os.path.basename(folder_path))
            frame_ids.append(frame_id)
    except Exception as e:
        print(f"error proccess im: {image_name} {e}")

    return features_list, labels, frame_ids

image_dir = '/content/drive/MyDrive/Hand Detection/_LABELLED_SAMPLES'
output_csv = '/content/drive/MyDrive/Hand Detection/output_features.csv'
folder_names = ['CARDS_COURTYARD_B_T' ,  'CHESS_COURTYARD_H_S' ,  'JENGA_COURTYARD_S_T' ,  'PUZZLE_COURTYARD_S_B',
'CARDS_COURTYARD_H_S'  , 'CHESS_COURTYARD_S_H']
num_threads = 10

save_features_to_csv_from_selected_folders(image_dir, output_csv, folder_names, num_threads)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np

X_data_scaled = pd.read_csv('/content/drive/MyDrive/Hand Detection/output_features.csv').fillna(0)
Y_data = pd.read_csv('/content/drive/MyDrive/Hand Detection/processed_scaled_HBB.csv')

X_data_scaled['label'] = X_data_scaled['label'].astype(str)
X_data_scaled['frame_id'] = X_data_scaled['frame_id'].astype(str)
Y_data['folder_label'] = Y_data['folder_label'].astype(str)
Y_data['video_id'] = Y_data['video_id'].astype(str)

aligned_data = pd.merge(
    X_data_scaled,
    Y_data,
    left_on=['label', 'frame_id'],
    right_on=['folder_label', 'video_id'],
    how='inner'
)

X = aligned_data.drop(columns=['label', 'frame_id', 'folder_label', 'video_id']).values
y_class = aligned_data[['h1_is_hand', 'h2_is_hand']].values
y_regression = aligned_data[['h1_x1', 'h1_y1', 'h1_x2', 'h1_y2', 'h2_x1', 'h2_y1', 'h2_x2', 'h2_y2']].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)

classification_acc = []
classification_prec = []
classification_rec = []
regression_rmse = []

input_layer = Input(shape=(X.shape[1], 1), name='input_layer')

conv1 = Conv1D(32, 3, activation='relu', kernel_regularizer=regularizers.l2(1e-3))(input_layer)
pool1 = MaxPooling1D(2)(conv1)

flatten = Flatten()(pool1)

shared = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(1e-3))(flatten)
shared = Dropout(0.2)(shared)
shared = Dense(32, activation='relu', kernel_regularizer=regularizers.l2(1e-3))(shared)
shared = Dropout(0.2)(shared)

class_output = Dense(2, activation='sigmoid', name='classification_output',
                     kernel_regularizer=regularizers.l2(1e-3))(shared)

reg_output = Dense(8, activation='sigmoid', name='regression_output',
                   kernel_regularizer=regularizers.l2(1e-2))(shared)

model = Model(inputs=input_layer, outputs=[class_output, reg_output])

model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss={
        'classification_output': 'binary_crossentropy',
        'regression_output': 'mean_squared_error'
    },
    loss_weights={
        'classification_output': 1.0,
        'regression_output': 10.0
    },
    metrics={
        'classification_output': ['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()],
        'regression_output': [tf.keras.metrics.RootMeanSquaredError()]
    }
)

for fold, (train_index, valid_index) in enumerate(kf.split(X)):
    print(f"Training Fold_{fold + 1}")

    X_train, X_valid = X[train_index], X[valid_index]
    y_class_train, y_class_valid = y_class[train_index], y_class[valid_index]
    y_reg_train, y_reg_valid = y_regression[train_index], y_regression[valid_index]

    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_valid = X_valid.reshape(X_valid.shape[0], X_valid.shape[1], 1)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, {"classification_output": y_class_train, "regression_output": y_reg_train}))
    train_dataset = train_dataset.shuffle(1000).batch(16).prefetch(buffer_size=tf.data.AUTOTUNE)

    valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, {"classification_output": y_class_valid, "regression_output": y_reg_valid}))
    valid_dataset = valid_dataset.batch(16).prefetch(buffer_size=tf.data.AUTOTUNE)

    callbacks = [
        tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3)
    ]

    model.fit(
        train_dataset,
        validation_data=valid_dataset,
        epochs=50,
        callbacks=callbacks,
        verbose=1
    )

    results = model.evaluate(valid_dataset, verbose=0)

    classification_acc.append(results[3])
    classification_prec.append(results[4])
    classification_rec.append(results[5])
    regression_rmse.append(results[6])

print("CV-Avg Results:")
print(f"classification precision: {np.mean(classification_prec):.4f}")
print(f"classification recall: {np.mean(classification_rec):.4f}")
print(f"regression RMSE: {np.mean(regression_rmse):.4f}")


In [None]:
valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, {"classification_output": y_class_valid, "regression_output": y_reg_valid}))
valid_dataset = valid_dataset.batch(16).prefetch(buffer_size=tf.data.AUTOTUNE)
final_results = model.evaluate(valid_dataset, verbose=1)
print("Model Eval on whole Validation Data:")
print(f"classification precision: {final_results[4]:.4f}")
print(f"classification recall: {final_results[5]:.4f}")
print(f"regression RMSE: {final_results[6]:.4f}")