In [None]:
import cv2
import os
import time

# --- Configuration ---
# Folder where you will save your dataset
DATA_DIR = 'my_amharic_dataset'
os.makedirs(DATA_DIR, exist_ok=True)

# Number of images to capture for each letter
NUM_IMAGES = 30

# --- Data Capture Script ---
print("Starting data capture tool...")
# Create a list of the 7 letters you will be capturing
# Example:
# letters = ['A', 'B', 'Ch', 'H', 'K', 'L', 'Ny']
letters = [
    # Ha family
    'ሀ', 'ሁ', 'ሂ', 'ሃ', 'ሄ', 'ህ', 'ሆ',
    # La family
    'ለ', 'ሉ', 'ሊ', 'ላ', 'ሌ', 'ል', 'ሎ',
    # Hha family
    'ሐ', 'ሑ', 'ሒ', 'ሓ', 'ሔ', 'ሕ', 'ሖ',
    # Ma family
    'መ', 'ሙ', 'ሚ', 'ማ', 'ሜ', 'ም', 'ሞ',
    # Sza family
    'ሠ', 'ሡ', 'ሢ', 'ሣ', 'ሤ', 'ሥ', 'ሦ',
    # Ra family
    'ረ', 'ሩ', 'ሪ', 'ራ', 'ሬ', 'ር', 'ሮ',
    # Sa family
    'ሰ', 'ሱ', 'ሲ', 'ሳ', 'ሴ', 'ስ', 'ሶ',
    # Sha family
    'ሸ', 'ሹ', 'ሺ', 'ሻ', 'ሼ', 'ሽ', 'ሾ',
    # Qa family
    'ቀ', 'ቁ', 'ቂ', 'ቃ', 'ቄ', 'ቅ', 'ቆ',
    # Ba family
    'በ', 'ቡ', 'ቢ', 'ባ', 'ቤ', 'ብ', 'ቦ',
    # Ta family
    'ተ', 'ቱ', 'ቲ', 'ታ', 'ቴ', 'ት', 'ቶ',
    # Cha family
    'ቸ', 'ቹ', 'ቺ', 'ቻ', 'ቼ', 'ች', 'ቾ',
    # Xa family
    'ኀ', 'ኁ', 'ኂ', 'ኃ', 'ኄ', 'ኅ', 'ኆ',
    # Na family
    'ነ', 'ኑ', 'ኒ', 'ና', 'ኔ', 'ን', 'ኖ',
    # Nya family
    'ኘ', 'ኙ', 'ኚ', 'ኛ', 'ኜ', 'ኝ', 'ኞ',
    # Glottal A family
    'አ', 'ኡ', 'ኢ', 'ኣ', 'ኤ', 'እ', 'ኦ',
    # Ka family
    'ከ', 'ኩ', 'ኪ', 'ካ', 'ኬ', 'ክ', 'ኮ',
    # KxA family
    'ኸ', 'ኹ', 'ኺ', 'ኻ', 'ኼ', 'ኽ', 'ኾ',
    # Wa family
    'ወ', 'ዉ', 'ዊ', 'ዋ', 'ዌ', 'ው', 'ዎ',
    # Pharyngeal A family
    'ዐ', 'ዑ', 'ዒ', 'ዓ', 'ዔ', 'ዕ', 'ዖ',
    # Za family
    'ዘ', 'ዙ', 'ዚ', 'ዛ', 'ዜ', 'ዝ', 'ዞ',
    # Zha family
    'ዠ', 'ዡ', 'ዢ', 'ዣ', 'ዤ', 'ዥ', 'ዦ',
    # Ya family
    'የ', 'ዩ', 'ዪ', 'ያ', 'ዬ', 'ይ', 'ዮ',
    # Da family
    'ደ', 'ዱ', 'ዲ', 'ዳ', 'ዴ', 'ድ', 'ዶ',
    # Ja family
    'ጀ', 'ጁ', 'ጂ', 'ጃ', 'ጄ', 'ጅ', 'ጆ',
    # Ga family
    'ገ', 'ጉ', 'ጊ', 'ጋ', 'ጌ', 'ግ', 'ጎ',
    # Tha family
    'ጠ', 'ጡ', 'ጢ', 'ጣ', 'ጤ', 'ጥ', 'ጦ',
    # Cha (ejective) family
    'ጨ', 'ጩ', 'ጪ', 'ጫ', 'ጬ', 'ጭ', 'ጮ',
    # Pha (ejective) family
    'ጰ', 'ጱ', 'ጲ', 'ጳ', 'ጴ', 'ጵ', 'ጶ',
    # Tsa (ejective) family
    'ጸ', 'ጹ', 'ጺ', 'ጻ', 'ጼ', 'ጽ', 'ጾ',
    # Tza (ejective) family
    'ፀ', 'ፁ', 'ፂ', 'ፃ', 'ፄ', 'ፅ', 'ፆ',
    # Fa family
    'ፈ', 'ፉ', 'ፊ', 'ፋ', 'ፌ', 'ፍ', 'ፎ',
    # Pa family
    'ፐ', 'ፑ', 'ፒ', 'ፓ', 'ፔ', 'ፕ', 'ፖ'
]
# Initialize webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Cannot open webcam.")
    exit()

for letter in letters:
    # Create a subfolder for the current letter
    letter_dir = os.path.join(DATA_DIR, letter)
    os.makedirs(letter_dir, exist_ok=True)
    
    print(f"\nGet ready to capture sign for the letter: '{letter}'")
    
    # Wait for user to be ready
    while True:
        ret, frame = cap.read()
        cv2.putText(frame, f"Ready for: {letter}? Press 'S' to start.", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow('Data Capture', frame)
        if cv2.waitKey(1) & 0xFF == ord('s'):
            break
            
    # Countdown before starting capture
    for i in range(3, 0, -1):
        print(f"Starting in {i}...")
        time.sleep(1)
        
    print(f"Capturing {NUM_IMAGES} images for '{letter}'...")
    for img_num in range(NUM_IMAGES):
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture frame.")
            break
            
        # Display feedback on the screen
        cv2.putText(frame, f"Capturing image {img_num + 1}/{NUM_IMAGES}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.imshow('Data Capture', frame)
        
        # Save the captured image
        img_name = os.path.join(letter_dir, f"{letter}_{img_num + 1}.jpg")
        cv2.imwrite(img_name, frame)
        
        # Wait for a short moment between captures
        time.sleep(0.3)
        
        # Allow early exit with 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

print("Data collection complete!")
cap.release()
cv2.destroyAllWindows()