In [1]:
%pip install tensorflow mediapipe pygame pillow

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 데이터 경로 설정
train_dir = 'project2/archive/Train'
test_dir = 'project2/archive/Test'

# 데이터 전처리
datagen = ImageDataGenerator(rescale=1.0 / 255)
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(28, 28),
    color_mode="grayscale",
    batch_size=32,
    class_mode="sparse"
)
test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(28, 28),
    color_mode="grayscale",
    batch_size=32,
    class_mode="sparse"
)

# 모델 정의
model = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D((2, 2)),
    BatchNormalization(),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(len(train_generator.class_indices), activation="softmax")
])

# 모델 컴파일
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# 모델 학습
model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=10
)

# 모델 저장
model.save('gesture_model22.h5')
print("모델이 저장되었습니다.")


Found 27455 images belonging to 24 classes.
Found 7172 images belonging to 24 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 47ms/step - accuracy: 0.5770 - loss: 1.4813 - val_accuracy: 0.8790 - val_loss: 0.3619
Epoch 2/10
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 37ms/step - accuracy: 0.9473 - loss: 0.1580 - val_accuracy: 0.9247 - val_loss: 0.3043
Epoch 3/10
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 37ms/step - accuracy: 0.9704 - loss: 0.0844 - val_accuracy: 0.9023 - val_loss: 0.3839
Epoch 4/10
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 41ms/step - accuracy: 0.9767 - loss: 0.0691 - val_accuracy: 0.9191 - val_loss: 0.3798
Epoch 5/10
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 40ms/step - accuracy: 0.9800 - loss: 0.0595 - val_accuracy: 0.9357 - val_loss: 0.2309
Epoch 6/10
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 36ms/step - accuracy: 0.9817 - loss: 0.0543 - val_accuracy: 0.9374 - val_loss: 0.2875
Epoch 7/10
[1m8



모델이 저장되었습니다.


In [12]:
import os
import cv2
import pygame
import numpy as np
from time import sleep
from tensorflow.keras.models import load_model
import mediapipe as mp

# Pygame 초기화
pygame.init()

# 화면 크기 설정
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("양방향 수화 변환")

# 색상 정의
WHITE = (255, 255, 255)

# MediaPipe Hands 초기화
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# 학습된 모델 로드
model = load_model('gesture_model22.h5')  # 수화 텍스트 변환 모델
classes = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")  # 클래스 이름

# 수화 이미지 로드
SIGN_IMAGE_DIR = "project2/archive/sign_images"
sign_images = {char: pygame.image.load(os.path.join(SIGN_IMAGE_DIR, f"{char}.png")) for char in classes}

def landmarks_to_image(landmarks):
    """
    손 랜드마크 데이터를 28x28 이미지로 변환.
    """
    image = np.zeros((28, 28), dtype=np.float32)
    for lm in landmarks:
        x = int(lm[0] * 27)
        y = int(lm[1] * 27)
        image[y, x] = 1.0
    return image

def predict_gesture(landmarks):
    """
    손 랜드마크를 모델 입력 형식으로 변환 후 예측.
    """
    image = landmarks_to_image(landmarks).reshape(1, 28, 28, 1)
    prediction = model.predict(image)
    class_id = np.argmax(prediction)
    return classes[class_id]

def display_sign(text):
    """
    입력된 텍스트에 따라 수화 이미지를 화면에 표시.
    """
    for char in text.upper():
        if char in sign_images:
            screen.fill(WHITE)
            image = pygame.transform.scale(sign_images[char], (400, 400))
            screen.blit(image, (200, 100))
            pygame.display.update()
            sleep(1)

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("카메라를 열 수 없습니다.")
        return

    print("Q 키를 눌러 종료하세요.")
    detected_text = ""  # 수화를 통해 변환된 텍스트 저장

    while True:
        ret, frame = cap.read()
        if not ret:
            print("프레임을 가져올 수 없습니다.")
            break

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                landmarks = [[lm.x, lm.y] for lm in hand_landmarks.landmark]
                detected_char = predict_gesture(landmarks)
                detected_text += detected_char
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # 텍스트 표시
        cv2.putText(frame, f"Detected Text: {detected_text}", (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        cv2.imshow("Sign to Text", frame)

        # 텍스트를 수화로 변환
        display_sign(detected_text)

        # Q 키로 종료
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    pygame.quit()

if __name__ == "__main__":
    main()




Q 키를 눌러 종료하세요.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step


In [18]:
import os
import cv2
import pygame
import numpy as np
from time import sleep
from tensorflow.keras.models import load_model
import mediapipe as mp

# Pygame 초기화
pygame.init()

# 화면 크기 설정
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("양방향 수화 변환")

# 색상 정의
WHITE = (255, 255, 255)

# MediaPipe Hands 초기화
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# 학습된 모델 로드
model = load_model('gesture_model22.h5')  # 수화 텍스트 변환 모델
classes = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")  # 클래스 이름

# 수화 이미지 로드
SIGN_IMAGE_DIR = "project2/archive/sign_images"
sign_images = {char: pygame.image.load(os.path.join(SIGN_IMAGE_DIR, f"{char}.png")) for char in classes}

def landmarks_to_image(landmarks):
    """
    손 랜드마크 데이터를 28x28 이미지로 변환.
    """
    image = np.zeros((28, 28), dtype=np.float32)
    for lm in landmarks:
        x = int(lm[0] * 27)
        y = int(lm[1] * 27)
        image[y, x] = 1.0
    return image

def predict_gesture(landmarks):
    """
    손 랜드마크를 모델 입력 형식으로 변환 후 예측.
    """
    image = landmarks_to_image(landmarks).reshape(1, 28, 28, 1)
    prediction = model.predict(image)
    confidence = np.max(prediction)
    if confidence < 0.7:  # 신뢰도 임계값 설정
        return None
    return classes[np.argmax(prediction)]

def display_sign(text):
    """
    입력된 텍스트에 따라 수화 이미지를 화면에 표시.
    """
    screen.fill(WHITE)
    if text:
        char = text[-1]  # 마지막 문자를 표시
        if char in sign_images:
            image = pygame.transform.scale(sign_images[char], (400, 400))
            screen.blit(image, (200, 100))
    pygame.display.update()

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("카메라를 열 수 없습니다.")
        return

    print("Q 키를 눌러 종료하세요.")
    detected_text = ""  # 수화를 통해 변환된 텍스트 저장
    max_text_length = 20  # 텍스트 길이 제한
    clock = pygame.time.Clock()  # FPS 제한용

    while True:
        ret, frame = cap.read()
        if not ret:
            print("프레임을 가져올 수 없습니다.")
            break

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                landmarks = [[lm.x, lm.y] for lm in hand_landmarks.landmark]
                detected_char = predict_gesture(landmarks)
                if detected_char and (len(detected_text) < max_text_length):
                    detected_text += detected_char
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # 텍스트 표시 (OpenCV 창)
        cv2.putText(frame, f"Detected Text: {detected_text}", (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow("Sign to Text", frame)

        # 수화 출력 (Pygame 창)
        display_sign(detected_text)

        # Q 키로 종료
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        clock.tick(30)  # FPS 제한

    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    pygame.quit()

if __name__ == "__main__":
    main()




Q 키를 눌러 종료하세요.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [20]:
import os
import cv2
import pygame
import numpy as np
from tensorflow.keras.models import load_model
import mediapipe as mp

# Pygame 초기화
pygame.init()

# 화면 크기 설정
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("양방향 수화 변환")

# 색상 정의
WHITE = (255, 255, 255)

# MediaPipe Hands 초기화
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# 학습된 모델 로드
model = load_model('gesture_model22.h5')  # 수화 텍스트 변환 모델
classes = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")  # 클래스 이름 (모델 학습 레이블 순서와 일치해야 함)

# 수화 이미지 로드
SIGN_IMAGE_DIR = "project2/archive/sign_images"
sign_images = {char: pygame.image.load(os.path.join(SIGN_IMAGE_DIR, f"{char}.png")) for char in classes}

# 마지막으로 인식된 문자 저장
last_detected_char = None

def landmarks_to_image(landmarks):
    """
    손 랜드마크 데이터를 28x28 이미지로 변환.
    """
    image = np.zeros((28, 28), dtype=np.float32)
    for lm in landmarks:
        x = int(lm[0] * 27)
        y = int(lm[1] * 27)
        image[y, x] = 1.0
    return image

def predict_gesture(landmarks):
    """
    손 랜드마크를 모델 입력 형식으로 변환 후 예측.
    """
    image = landmarks_to_image(landmarks).reshape(1, 28, 28, 1)
    prediction = model.predict(image)
    confidence = np.max(prediction)
    if confidence < 0.7:  # 신뢰도 임계값 설정
        return None
    return classes[np.argmax(prediction)]

def display_sign(text):
    """
    입력된 텍스트에 따라 수화 이미지를 화면에 표시.
    """
    screen.fill(WHITE)
    if text:
        char = text[-1]  # 마지막 문자를 표시
        if char in sign_images:
            image = pygame.transform.scale(sign_images[char], (400, 400))
            screen.blit(image, (200, 100))
    pygame.display.update()

def main():
    global last_detected_char

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("카메라를 열 수 없습니다.")
        return

    print("Q 키를 눌러 종료하세요.")
    detected_text = ""  # 수화를 통해 변환된 텍스트 저장
    max_text_length = 20  # 텍스트 길이 제한
    clock = pygame.time.Clock()  # FPS 제한용

    while True:
        ret, frame = cap.read()
        if not ret:
            print("프레임을 가져올 수 없습니다.")
            break

        frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        result = hands.process(rgb_frame)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                landmarks = [[lm.x, lm.y] for lm in hand_landmarks.landmark]
                detected_char = predict_gesture(landmarks)

                # 동일한 결과 반복 방지
                if detected_char and detected_char != last_detected_char:
                    last_detected_char = detected_char
                    if len(detected_text) < max_text_length:
                        detected_text += detected_char

                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # 텍스트 표시 (OpenCV 창)
        cv2.putText(frame, f"Detected Text: {detected_text}", (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow("Sign to Text", frame)

        # 수화 출력 (Pygame 창)
        display_sign(detected_text)

        # Q 키로 종료
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        clock.tick(30)  # FPS 제한

    cap.release()
    cv2.destroyAllWindows()
    hands.close()
    pygame.quit()

if __name__ == "__main__":
    main()




Q 키를 눌러 종료하세요.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

In [21]:
%pip install ultralytics


Note: you may need to restart the kernel to use updated packages.


In [25]:
%pip install ultralytics opencv-python pygame


Note: you may need to restart the kernel to use updated packages.


In [5]:
import cv2
import pygame
from ultralytics import YOLO
import os
from time import sleep

# YOLO 모델 로드
model = YOLO("project2/archive/asl_yolov8_model.pt")  # 학습된 모델 파일 경로

# Pygame 초기화
pygame.init()

# 화면 크기 설정
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("양방향 수화 변환")

# 색상 정의
WHITE = (255, 255, 255)

# 알파벳 클래스
classes = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

# 수화 이미지 경로
SIGN_IMAGE_DIR = "project2/archive/sign_images"
sign_images = {char: pygame.image.load(os.path.join(SIGN_IMAGE_DIR, f"{char}.png")) for char in classes}

# 텍스트를 수화 이미지로 출력
def display_sign(text):
    """
    입력된 텍스트에 따라 수화 이미지를 화면에 표시.
    """
    screen.fill(WHITE)
    if text:
        char = text[-1]  # 마지막 문자를 표시
        if char in sign_images:
            image = pygame.transform.scale(sign_images[char], (400, 400))
            screen.blit(image, (200, 100))
    pygame.display.update()

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("카메라를 열 수 없습니다.")
        return

    print("Q 키를 눌러 종료하세요.")
    detected_text = ""  # 수화를 통해 변환된 텍스트 저장
    max_text_length = 20  # 텍스트 길이 제한
    last_detected_char = None  # 마지막으로 인식된 문자
    clock = pygame.time.Clock()  # FPS 제한용

    while True:
        ret, frame = cap.read()
        if not ret:
            print("프레임을 가져올 수 없습니다.")
            break

        # YOLO로 추론
        results = model(frame)
        for result in results:
            for box in result.boxes:
                # 클래스 ID와 신뢰도
                class_id = int(box.cls[0])
                confidence = box.conf[0]

                # 신뢰도 임계값
                if confidence > 0.7:
                    detected_char = classes[class_id]

                    # 동일한 결과 반복 방지
                    if detected_char != last_detected_char:
                        last_detected_char = detected_char
                        if len(detected_text) < max_text_length:
                            detected_text += detected_char

                    # 바운딩 박스 그리기
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f"{detected_char} ({confidence:.2f})", (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # 텍스트 표시
        cv2.putText(frame, f"Detected Text: {detected_text}", (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow("Sign to Text", frame)

        # 수화 출력 (Pygame 창)
        display_sign(detected_text)

        # Q 키로 종료
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        clock.tick(30)  # FPS 제한

    cap.release()
    cv2.destroyAllWindows()
    pygame.quit()

if __name__ == "__main__":
    main()


Q 키를 눌러 종료하세요.

0: 480x640 2 Ks, 28.6ms
Speed: 9.5ms preprocess, 28.6ms inference, 9.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 Ks, 16.4ms
Speed: 7.0ms preprocess, 16.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 K, 16.6ms
Speed: 4.8ms preprocess, 16.6ms inference, 6.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 19.2ms
Speed: 4.4ms preprocess, 19.2ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 K, 18.0ms
Speed: 2.2ms preprocess, 18.0ms inference, 2.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 K, 15.9ms
Speed: 2.0ms preprocess, 15.9ms inference, 2.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 Ks, 17.3ms
Speed: 4.6ms preprocess, 17.3ms inference, 13.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 K, 15.2ms
Speed: 3.2ms preprocess, 15.2ms inference, 2.8ms postprocess per image at shape (1, 3, 480, 640)



In [12]:
import cv2
import pygame
from ultralytics import YOLO
import os
from time import sleep

# YOLO 모델 로드
model = YOLO("project2/archive/asl_yolov8_model.pt")  # 학습된 모델 파일 경로

# Pygame 초기화
pygame.init()

# 화면 크기 설정
screen = pygame.display.set_mode((800, 600))
pygame.display.set_caption("양방향 수화 변환")

# 색상 정의
WHITE = (255, 255, 255)

# 알파벳 클래스
classes = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")

# 수화 이미지 경로
SIGN_IMAGE_DIR = "project2/archive/sign_images"
sign_images = {char: pygame.image.load(os.path.join(SIGN_IMAGE_DIR, f"{char}.png")) for char in classes}

# 텍스트를 수화 이미지로 출력
def display_sign(text):
    """
    입력된 텍스트에 따라 수화 이미지를 화면에 표시.
    """
    screen.fill(WHITE)
    if text:
        char = text[-1]  # 마지막 문자를 표시
        if char in sign_images:
            image = pygame.transform.scale(sign_images[char], (400, 400))
            screen.blit(image, (200, 100))
    pygame.display.update()

def display_text_to_sign(input_text):
    """
    텍스트를 수화 이미지로 출력.
    """
    screen.fill(WHITE)
    for char in input_text.upper():
        if char in sign_images:
            screen.fill(WHITE)
            image = pygame.transform.scale(sign_images[char], (400, 400))
            screen.blit(image, (200, 100))
            pygame.display.update()
            sleep(1)  # 1초 동안 이미지 표시
    pygame.display.update()

def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("카메라를 열 수 없습니다.")
        return

    print("Q 키를 눌러 종료하세요.")
    print("텍스트를 입력하려면 `T`를 누르세요.")
    detected_text = ""  # 수화를 통해 변환된 텍스트 저장
    max_text_length = 20  # 텍스트 길이 제한
    last_detected_char = None  # 마지막으로 인식된 문자
    clock = pygame.time.Clock()  # FPS 제한용

    while True:
        ret, frame = cap.read()
        if not ret:
            print("프레임을 가져올 수 없습니다.")
            break

        # YOLO로 추론
        results = model(frame)
        for result in results:
            for box in result.boxes:
                # 클래스 ID와 신뢰도
                class_id = int(box.cls[0])
                confidence = box.conf[0]

                # 신뢰도 임계값
                if confidence > 0.7:
                    detected_char = classes[class_id]

                    # 동일한 결과 반복 방지
                    if detected_char != last_detected_char:
                        last_detected_char = detected_char
                        if len(detected_text) < max_text_length:
                            detected_text += detected_char

                    # 바운딩 박스 그리기
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f"{detected_char} ({confidence:.2f})", (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

        # 텍스트 표시
        cv2.putText(frame, f"Detected Text: {detected_text}", (10, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow("Sign to Text", frame)

        # 수화 출력 (Pygame 창)
        display_sign(detected_text)

        # 키 입력 처리
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):  # Q 키로 종료
            break
        elif key == ord('t'):  # T 키로 텍스트 입력 모드
            cap.release()
            cv2.destroyAllWindows()
            input_text = input("Enter a text to convert to sign language: ")
            display_text_to_sign(input_text)
            print("텍스트 수화 변환 완료. 프로그램으로 돌아갑니다.")
            cap = cv2.VideoCapture(0)  # 카메라 다시 열기

        clock.tick(30)  # FPS 제한

    cap.release()
    cv2.destroyAllWindows()
    pygame.quit()

if __name__ == "__main__":
    main()


Q 키를 눌러 종료하세요.
텍스트를 입력하려면 `T`를 누르세요.

0: 480x640 (no detections), 20.8ms
Speed: 3.6ms preprocess, 20.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 23.1ms
Speed: 4.2ms preprocess, 23.1ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 19.3ms
Speed: 3.4ms preprocess, 19.3ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 16.4ms
Speed: 6.6ms preprocess, 16.4ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 22.0ms
Speed: 2.0ms preprocess, 22.0ms inference, 3.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 19.1ms
Speed: 8.2ms preprocess, 19.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 C, 18.1ms
Speed: 2.5ms preprocess, 18.1ms inference, 2.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 C, 17.4ms
Speed: 3.3ms preproc