In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: Mountpoint must not already contain files

In [None]:
!pip install mediapipe opencv-python openpyxl

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting numpy<2 (from mediapipe)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m38.

In [None]:
!pip uninstall -y numpy pandas mediapipe
!pip install numpy==1.24.4 pandas==1.5.3 mediapipe==0.10.21

Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Found existing installation: mediapipe 0.10.21
Uninstalling mediapipe-0.10.21:
  Successfully uninstalled mediapipe-0.10.21
Collecting numpy==1.24.4
  Downloading numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting pandas==1.5.3
  Downloading pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting mediapipe==0.10.21
  Using cached mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
INFO: pip is looking at multiple versions of jax to determine which version is compatible with other requirements. This could take a while.
Collecting jax (from mediapipe==0.10.21)
  Downloading jax-0.6.0-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe==0.10.21)

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
from tqdm import tqdm
from urllib.request import urlretrieve

In [1]:
# 경로 설정
video_dir = "/content/drive/MyDrive/WLASL-2000/videos"
train_excel = "/content/drive/MyDrive/WLASL-2000/top50_train.xlsx"
test_excel = "/content/drive/MyDrive/WLASL-2000/top50_test.xlsx"
train_save_dir = "/content/drive/MyDrive/WLASL-2000/MP_Data_Train"
test_save_dir = "/content/drive/MyDrive/WLASL-2000/MP_Data_Test"

os.makedirs(train_save_dir, exist_ok=True)
os.makedirs(test_save_dir, exist_ok=True)

# MediaPipe 초기화
mp_holistic = mp.solutions.holistic

# 실패한 비디오 목록 저장용
failed_videos = []

# 랜드마크 추출 함수
def extract(lms, n):
    if lms:
        return [[p.x, p.y, p.z] for p in lms.landmark]
    else:
        return [[0.0, 0.0, 0.0]] * n

# 엑셀 파일 기반으로 전체 처리 수행
def process_excel(excel_path, save_root):
    df = pd.read_excel(excel_path)

    for _, row in tqdm(df.iterrows(), total=len(df)):
        gloss = row["어휘명"]
        filename = row["파일명"]
        video_id = os.path.splitext(filename)[0]
        video_path = os.path.join(video_dir, filename)

        # 비디오 파일 존재 여부 확인
        if not os.path.exists(video_path):
            print(f"영상 없음: {filename}")
            failed_videos.append(filename)
            continue

        save_dir = os.path.join(save_root, gloss, video_id)
        os.makedirs(save_dir, exist_ok=True)

        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"영상 열기 실패: {filename}")
            failed_videos.append(filename)
            continue

        frame_idx = 0
        try:
            with mp_holistic.Holistic(static_image_mode=False) as holistic:
                while cap.isOpened():
                    success, frame = cap.read()
                    if not success:
                        break

                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    results = holistic.process(frame)

                    keypoints = []
                    keypoints += extract(results.face_landmarks, 468)
                    keypoints += extract(results.pose_landmarks, 33)
                    keypoints += extract(results.left_hand_landmarks, 21)
                    keypoints += extract(results.right_hand_landmarks, 21)
                    keypoints = np.array(keypoints)

                    # 잘못된 경로에 저장되지 않도록 확인
                    save_path = os.path.join(save_dir, f"{frame_idx}.npy")
                    assert save_path.startswith("/content/drive/MyDrive/WLASL-2000/"), \
                        f"잘못된 경로 감지됨: {save_path}"

                    np.save(save_path, keypoints)
                    frame_idx += 1

        except Exception as e:
            print(f"처리 실패: {filename} | 이유: {str(e)}")
            failed_videos.append(filename)

        cap.release()

# 학습 데이터 처리 시작
print("학습 데이터 처리 중...")
process_excel(train_excel, train_save_dir)

# 테스트 데이터 처리 시작
print("테스트 데이터 처리 중...")
process_excel(test_excel, test_save_dir)

# 실패한 비디오 목록 저장
with open("/content/drive/MyDrive/WLASL-2000/failed_videos.txt", "w") as f:
    for name in failed_videos:
        f.write(name + "\n")

print(f"전체 처리 완료, 실패한 비디오 수: {len(failed_videos)}개")


NameError: name 'os' is not defined