In [30]:
import csv
import cv2
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import mediapipe as mp
import numpy as np
import os
import pandas as pd
from random import uniform
from utils import drawLandmarks

%matplotlib inline

In [31]:
if not os.path.exists("./preview"):
	os.makedirs("./preview")

### get dataset source videos

In [32]:
dataset = {}
alphabet = [d.path for d in os.scandir("./dataset") if d.is_dir()]

for path in alphabet:
	dataset[os.path.basename(path)] = [f.path for f in os.scandir(path) if f.is_file() and f.name.endswith(".mp4")]

In [33]:
HandLandmarker = mp.tasks.vision.HandLandmarker

options = mp.tasks.vision.HandLandmarkerOptions(
	base_options=mp.tasks.BaseOptions(model_asset_path="./models/hand_landmarker.task"),
	running_mode=mp.tasks.vision.RunningMode.VIDEO,
	num_hands=2,
)

### generating data cache for each video

In [34]:
for path in dataset:
	for file in dataset[path]:
		if file == "nan":
			continue

		cache_file = f"{file}.csv"

		if os.path.exists(cache_file):
			print(f"Cache found, skipping {file}")
			continue

		with HandLandmarker.create_from_options(options) as landmarker:
			print(f"Processing {file}")
			vid = cv2.VideoCapture(file)

			if not vid.isOpened():
				print(f"Error opening {file}")
				continue

			fps = vid.get(cv2.CAP_PROP_FPS)
			inc = 1000 / fps
			i = 0

			video_data = []

			while True:
				ret, frame = vid.read()
				if not ret:
					print(f"Finished {file}")
					break

				mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
				result = landmarker.detect_for_video(mp_img, int(i))
				landmarks_ls = result.hand_world_landmarks
				handedness_ls = result.handedness

				if not landmarks_ls:
					i += inc
					continue

				for idx in range(len(landmarks_ls)):
					landmarks = pd.Series(np.array([[landmark.x, landmark.y, landmark.z] for landmark in landmarks_ls[idx]]).flatten())
					handedness = handedness_ls[idx][0].index

					landmarks = landmarks.tolist()

					video_data.append([path, handedness] + landmarks)

				if i == 0:
					output = drawLandmarks(mp_img.numpy_view(), result)
					cv2.imwrite(f"./preview/{os.path.basename(file)}.jpg", cv2.cvtColor(output, cv2.COLOR_RGB2BGR))

				i += inc

			vid.release()

			if video_data:
				with open(cache_file, "w", newline="") as f:
					writer = csv.writer(f)
					header = ["letter", "hand"] + [f"{coord}{i}" for i in range(21) for coord in ["x", "y", "z"]]
					writer.writerow(header)
					writer.writerows(video_data)

Cache found, skipping ./dataset/R/IMG_4864.mp4
Cache found, skipping ./dataset/R/IMG_4681.mp4
Cache found, skipping ./dataset/R/IMG_3945.mp4
Cache found, skipping ./dataset/R/IMG_3833.mp4
Cache found, skipping ./dataset/R/IMG_3773.mp4
Cache found, skipping ./dataset/R/IMG_3999.mp4
Cache found, skipping ./dataset/R/IMG_5557.mp4
Cache found, skipping ./dataset/R/IMG_4890.mp4
Cache found, skipping ./dataset/U/IMG_4867.mp4
Cache found, skipping ./dataset/U/IMG_5560.mp4
Cache found, skipping ./dataset/U/IMG_4684.mp4
Cache found, skipping ./dataset/U/IMG_3776.mp4
Cache found, skipping ./dataset/U/IMG_3836.mp4
Cache found, skipping ./dataset/U/IMG_4003.mp4
Cache found, skipping ./dataset/U/IMG_4893.mp4
Cache found, skipping ./dataset/I/IMG_5549.mp4
Cache found, skipping ./dataset/I/IMG_3763.mp4
Cache found, skipping ./dataset/I/IMG_3991.mp4
Cache found, skipping ./dataset/I/IMG_3823.mp4
Cache found, skipping ./dataset/I/IMG_4701.mp4
Cache found, skipping ./dataset/I/IMG_4880.mp4
Cache found, 

### combine all data cache files

In [35]:
columns = ["letter", "hand"]
for i in range(21):
	columns.append(f"x{i}")
	columns.append(f"y{i}")
	columns.append(f"z{i}")

if os.path.exists("dataset.csv"):
	i = 1
	while os.path.exists(f"dataset.csv.bak{i}"):
		i += 1
	os.rename("dataset.csv", f"dataset.csv.bak{i}")

with open("dataset.csv", "w", newline="") as f:
	csv.writer(f).writerow(columns)

In [44]:
blacklist = set()

for path in dataset:
	for file in dataset[path]:
		cache_file = f"{file}.csv"

		if not os.path.exists(cache_file):
			print(f"Cache not found, skipping {file}")
			blacklist.add(os.path.basename(file))
			continue

		if os.path.basename(file) in blacklist:
			print(f"File in blacklist, skipping {file}")
			continue

		try:
			with open(cache_file, "r") as f:
				reader = csv.reader(f)
				header = next(reader)
				data = list(reader)

				with open("dataset.csv", "a", newline="") as f:
					writer = csv.writer(f)
					for row in data:
						for idx in range(2, len(row)):
							row[idx] = str(float(row[idx]) + uniform(-0.0001, 0.0001))
						writer.writerow(row)

		except Exception as e:
			print(f"Error processing {file}: {e}")
			blacklist.append(os.path.basename(file))
			continue

### split dataset

In [None]:
letters = list("ABCDEFGHIKLMNOPQRSTUVWXY#")

print("Reading CSV...")
df = pd.read_csv("dataset.csv")

expected_cols = ["letter", "hand"] + [f"{coord}{i}" for i in range(21) for coord in ["x", "y", "z"]]
if list(df.columns) != expected_cols:
	raise ValueError(f"Expected columns {expected_cols}, got {list(df.columns)}")

print("Splitting train/test...")
train_data = []
test_data = []

for letter in letters:
	letter_data = df[df["letter"] == letter]

	train_sample = letter_data.sample(n=int(len(letter_data) * 0.8), random_state=42)
	remaining = letter_data.drop(train_sample.index)
	test_sample = remaining.sample(n=len(remaining), random_state=42)

	print(f"Letter {letter}: Train {len(train_sample)}, Test {len(test_sample)}")

	train_data.append(train_sample)
	test_data.append(test_sample)

train_df = pd.concat(train_data, ignore_index=True)
train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)  # Shuffle
test_df = pd.concat(test_data, ignore_index=True)
test_df = test_df.sample(frac=1, random_state=42).reset_index(drop=True)  # Shuffle

print("Saving to train.csv and test.csv...")
train_df.to_csv("train.csv", index=False)
test_df.to_csv("test.csv", index=False)

print(f"Done! Train rows: {len(train_df)}, Test rows: {len(test_df)}")

Reading CSV...
Splitting train/test...
Letter A: Train 2808, Test 703
Letter B: Train 3004, Test 751
Letter C: Train 2704, Test 677
Letter D: Train 1766, Test 442
Letter E: Train 2856, Test 715
Letter F: Train 2577, Test 645
Letter G: Train 2858, Test 715
Letter H: Train 3218, Test 805
Letter I: Train 2423, Test 606
Letter K: Train 2428, Test 608
Letter L: Train 2024, Test 506
Letter M: Train 2786, Test 697
Letter N: Train 2355, Test 589
Letter O: Train 3006, Test 752
Letter P: Train 2963, Test 741
Letter Q: Train 2722, Test 681
Letter R: Train 2292, Test 574
Letter S: Train 2228, Test 557
Letter T: Train 2493, Test 624
Letter U: Train 1989, Test 498
Letter V: Train 2226, Test 557
Letter W: Train 2302, Test 576
Letter X: Train 2764, Test 692
Letter Y: Train 2252, Test 563
Letter #: Train 201, Test 51
Saving to train.csv and test.csv...
Done! Train rows: 61245, Test rows: 15325


: 

### visualize

In [None]:
landmarks = pd.Series()
with HandLandmarker.create_from_options(options) as landmarker:
	file = "./dataset/V/IMG_3837.mp4"
	print(file)
	vid = cv2.VideoCapture(file)

	if not vid.isOpened():
		print(f"Error opening {file}")
		exit

	ret, frame = vid.read()
	mp_img = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

	result = landmarker.detect_for_video(mp_img, 0)
	landmarks_ls = result.hand_world_landmarks

	for idx in range(len(landmarks_ls)):
		landmarks = pd.Series(np.array([[landmark.x, landmark.y, landmark.z] for landmark in landmarks_ls[idx]]).flatten())

	vid.release()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")
ax.view_init(elev=10, azim=255)
ax.set_xlim(-0.1, 0.1)
ax.set_ylim(-0.1, 0.1)
ax.set_zlim(-0.1, 0.1)
ax.set_xlabel("X")
ax.set_ylabel("Z")
ax.set_zlabel("Y")

frames = 360
fps = 30


def update(frame):
	ax.view_init(elev=10 + np.sin(np.radians(360 / frames * frame * 2)) * 10, azim=360 / frames * frame)


colors = ["cyan", "orchid", "deepskyblue", "lime", "orange", "k", "k"]

lines = [np.array([landmarks[j : j + 3] for j in range(i, i + 12, 3)]) for i in range(3, 63, 12)]
lines.append(np.array([landmarks[0:3], landmarks[3:6]]))
lines.append(np.array([landmarks[0:3], *[landmarks[j : j + 3] for j in range(15, 63, 12)], landmarks[0:3]]))
for i in range(len(lines)):
	line = lines[i]
	ax.plot(line[:, 0], line[:, 2], -line[:, 1] * 1.25, color=colors[i])

for j in range(0, 63, 3):
	ax.scatter(
		landmarks[j],
		landmarks[j + 2],
		-landmarks[j + 1] * 1.25,
		color=colors[int((j / 3 - 1) // 4)] if j != 0 else "k",
	)

ani = animation.FuncAnimation(fig, update, frames=frames, interval=1000 / fps)  # Rotate 360Â°
ani.save("hand.gif", writer="ffmpeg", fps=fps, progress_callback=(lambda i, n: print(f"Progress: {i}/{n}")))

plt.show()