# 0. LOAD virtual environment

In [2]:
# terminal command - create .venv file with python==3.10
python3.10 -m venv .venv 

In [3]:
# terminal command - activate .venv
source .venv/bin/activate

In [None]:
# terminal command - isntall requirements txt
pip install -r requirements.txt

# 1. CREATE images

In [1]:
import cv2
import os
import time 
import uuid # naming image files

In [2]:
IMAGES_PATH = "Tensorflow/workspace/images/collectedimages"

**- UPDATE labels = sign language glosses -**

In [3]:
# create the labels = sign language glosses the model will be trained
labels = [
    # sample images already existing
    # "hello", "thanks", "yes", "no", "iloveyou", 
    # test sentence glosses PHOENIX WEATHER 1May_2010_Saturday_tagesschau_default-11 1 signer05 0.0 1.79769e+308
    "montag", "auch", "mehr", "wolke", "als", "sonne", "ueberwiegend", "regen", "gewitter",
    ]

number_imgs = 15 # 15 per gloss taken -> later transformatoins increase training data size

In [30]:
# collect images via webcam
for label in labels:
    # os.makedirs("Tensorflow/workspace/images/collectedimages/", exist_ok=True) --> already existing with examples of labeled images
    cap = cv2.VideoCapture(0) # device number might vary depending on hardware, 0 works for macos
    print("collecting images for {}".format(label))
    time.sleep(5) # 5s time lapse before starting image collection 
    for imgnum in range (number_imgs):
        ret, frame = cap.read()
        imagename = os.path.join(IMAGES_PATH, f"{label}.{uuid.uuid1()}.jpg")
        # imagename = os.path.join(IMAGES_PATH, label, label+"."+"{}.jpg".format(str(uuid.uuid1()))) --> old naming used with subfolder structure
        cv2.imwrite(imagename, frame)
        cv2.imshow("frame", frame)
        time.sleep(2)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    
    cap.release()
    cv2.destroyAllWindows()  # Close all windows created by cv2.imshow()

collecting images for gewitter


**- UPDATE canvas.py before running labelimg -**

In [5]:
import os

def update_canvas_file(file_path):
    # Lines to be commented out and their replacements
    original_lines = {
        526: "p.drawRect(left_top.x(), left_top.y(), rect_width, rect_height)",
        530: "p.drawLine(self.prev_point.x(), 0, self.prev_point.x(), self.pixmap.height())",
        531: "p.drawLine(0, self.prev_point.y(), self.pixmap.width(), self.prev_point.y())"
    }
    replacement_lines = {
        526: "p.drawRect(int(left_top.x()), int(left_top.y()), int(rect_width), int(rect_height))",
        530: "p.drawLine(int(self.prev_point.x()), 0, int(self.prev_point.x()), self.pixmap.height())",
        531: "p.drawLine(0, int(self.prev_point.y()), self.pixmap.width(), int(self.prev_point.y()))"
    }
    
    # Read the file
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Update the lines
    for line_num in sorted(original_lines.keys(), reverse=True):
        original_line = original_lines[line_num]
        if original_line.strip() in lines[line_num - 1].strip():
            indent_level = len(lines[line_num - 1]) - len(lines[line_num - 1].lstrip())
            lines[line_num - 1] = f"# {lines[line_num - 1]}"
            lines.insert(line_num, " " * indent_level + f"{replacement_lines[line_num]}\n")

    # Write the updated lines back to the file
    with open(file_path, 'w') as file:
        file.writelines(lines)

# Path to the canvas.py file
file_path = '.venv/lib/python3.10/site-packages/libs/canvas.py'

# Update the canvas.py file
update_canvas_file(file_path)
print(f"Updated {file_path} successfully.")


Updated .venv/lib/python3.10/site-packages/libs/canvas.py successfully.


In [23]:
# install & launch labelimg to label images - create boxes around hand signs in images
!labelimg

2025-02-03 13:00:49.088 Python[54202:882368] +[IMKClient subclass]: chose IMKClient_Modern
Traceback (most recent call last):
  File "/Users/maximilianscheel/neuefische/capstone_project/capstone_sl_txt_voice/sl_rtod/.venv/lib/python3.10/site-packages/libs/canvas.py", line 530, in paintEvent
    p.drawLine(self.prev_point.x(), 0, self.prev_point.x(), self.pixmap.height())
TypeError: arguments did not match any overloaded call:
  drawLine(self, l: QLineF): argument 1 has unexpected type 'float'
  drawLine(self, line: QLine): argument 1 has unexpected type 'float'
  drawLine(self, x1: int, y1: int, x2: int, y2: int): argument 1 has unexpected type 'float'
  drawLine(self, p1: QPoint, p2: QPoint): argument 1 has unexpected type 'float'
  drawLine(self, p1: Union[QPointF, QPoint], p2: Union[QPointF, QPoint]): argument 1 has unexpected type 'float'


# 2. TRANSFORM dataset (did not improve model)

Code snippets create further training material based on created images (e.g. fklipping, cropping, removing background,etc.).

## flio_horizontal (mirror)

In [15]:
import os
import cv2
import xml.etree.ElementTree as ET

# Define the paths where original images are stored and where flipped images will be saved
ORIGINAL_IMAGES_PATH = "tensorflow/workspace/images/collectedimages"
FLIPPED_IMAGES_PATH = "tensorflow/workspace/images/transformed_images/flip_horizontal"

# Ensure the output directory exists
os.makedirs(FLIPPED_IMAGES_PATH, exist_ok=True)

# Function to flip image horizontally and save flipped image with updated XML
def flip_image_and_xml(image_path, xml_path, label):
    # Load image
    image = cv2.imread(image_path)
    flipped_image = cv2.flip(image, 1)  # Horizontal flip
    
    # Save flipped image
    base_filename = os.path.basename(image_path)
    flipped_image_name = base_filename.replace(label, f"{label}.flip")
    flipped_image_path = os.path.join(FLIPPED_IMAGES_PATH, flipped_image_name)
    cv2.imwrite(flipped_image_path, flipped_image)
    
    # Parse XML
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Update XML filename and path
    root.find('filename').text = flipped_image_name
    root.find('path').text = flipped_image_path
    
    # Get image dimensions
    width = int(root.find('size/width').text)
    
    # Adjust bounding boxes
    for obj in root.findall('object'):
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        xmax = int(bndbox.find('xmax').text)
        
        # Flip bounding box horizontally
        new_xmin = width - xmax
        new_xmax = width - xmin
        
        bndbox.find('xmin').text = str(new_xmin)
        bndbox.find('xmax').text = str(new_xmax)
    
    # Save updated XML
    flipped_xml_path = os.path.join(FLIPPED_IMAGES_PATH, flipped_image_name.replace('.jpg', '.xml'))
    tree.write(flipped_xml_path)

# Loop through original images and XML files
for filename in os.listdir(ORIGINAL_IMAGES_PATH):
    if filename.endswith(".jpg"):
        label = filename.split('.')[0]
        image_path = os.path.join(ORIGINAL_IMAGES_PATH, filename)
        xml_path = image_path.replace(".jpg", ".xml")
        
        if os.path.exists(xml_path):
            flip_image_and_xml(image_path, xml_path, label)


## random_rotate (not used as position/ rotation of hand might distort meaning)

In [6]:
import os
import cv2
import math
import xml.etree.ElementTree as ET
import random

# Define the path where original images are stored and where rotated images will be saved
ORIGINAL_IMAGES_PATH = "tensorflow/workspace/images/collectedimages"
ROTATED_IMAGES_PATH = "tensorflow/workspace/images/transformed_images/random_rotate"

# Ensure the output directory exists
os.makedirs(ROTATED_IMAGES_PATH, exist_ok=True)

# Function to rotate image and save rotated image with updated XML
def rotate_image_and_xml(image_path, xml_path, label):
    # Load image
    image = cv2.imread(image_path)
    height, width = image.shape[:2]

    # Generate a random angle between -30 and 30 degrees
    angle = random.uniform(-30, 30)
    center = (width / 2, height / 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)

    # Compute the size of the rotated image
    cos = abs(M[0, 0])
    sin = abs(M[0, 1])
    new_width = int((height * sin) + (width * cos))
    new_height = int((height * cos) + (width * sin))

    # Adjust the rotation matrix to take into account translation
    M[0, 2] += (new_width / 2) - center[0]
    M[1, 2] += (new_height / 2) - center[1]

    # Perform the rotation
    rotated_image = cv2.warpAffine(image, M, (new_width, new_height))

    # Save rotated image
    base_filename = os.path.basename(image_path)
    rotated_image_name = base_filename.replace(label, f"{label}.rotate")
    rotated_image_path = os.path.join(ROTATED_IMAGES_PATH, rotated_image_name)
    cv2.imwrite(rotated_image_path, rotated_image)

    # Parse XML
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Update XML filename and path
    root.find('filename').text = rotated_image_name
    root.find('path').text = rotated_image_path

    # Adjust bounding boxes
    for obj in root.findall('object'):
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        # Coordinates of the four corners of the bounding box
        corners = [
            [xmin, ymin],
            [xmax, ymin],
            [xmax, ymax],
            [xmin, ymax]
        ]

        # Rotate the corners
        rotated_corners = []
        for corner in corners:
            x, y = corner
            new_x = M[0, 0] * x + M[0, 1] * y + M[0, 2]
            new_y = M[1, 0] * x + M[1, 1] * y + M[1, 2]
            rotated_corners.append([new_x, new_y])

        # Find the new bounding box
        new_xmin = min(corner[0] for corner in rotated_corners)
        new_ymin = min(corner[1] for corner in rotated_corners)
        new_xmax = max(corner[0] for corner in rotated_corners)
        new_ymax = max(corner[1] for corner in rotated_corners)

        # Ensure the bounding box is within the image boundaries
        new_xmin = max(0, new_xmin)
        new_ymin = max(0, new_ymin)
        new_xmax = min(new_width, new_xmax)
        new_ymax = min(new_height, new_ymax)

        bndbox.find('xmin').text = str(int(new_xmin))
        bndbox.find('ymin').text = str(int(new_ymin))
        bndbox.find('xmax').text = str(int(new_xmax))
        bndbox.find('ymax').text = str(int(new_ymax))

    # Save updated XML
    rotated_xml_path = os.path.join(ROTATED_IMAGES_PATH, rotated_image_name.replace('.jpg', '.xml'))
    tree.write(rotated_xml_path)

# Loop through original images and XML files
for filename in os.listdir(ORIGINAL_IMAGES_PATH):
    if filename.endswith(".jpg"):
        label = filename.split('.')[0]
        image_path = os.path.join(ORIGINAL_IMAGES_PATH, filename)
        xml_path = image_path.replace(".jpg", ".xml")

        if os.path.exists(xml_path):
            rotate_image_and_xml(image_path, xml_path, label)


## crop_random

In [16]:
import os
import cv2
import xml.etree.ElementTree as ET
import random

# Define the path where original images are stored and where cropped images will be saved
ORIGINAL_IMAGES_PATH = "tensorflow/workspace/images/collectedimages"
CROPPED_IMAGES_PATH = "tensorflow/workspace/images/transformed_images/crop_random"

# Ensure the output directory exists
os.makedirs(CROPPED_IMAGES_PATH, exist_ok=True)

# Function to randomly crop image and save cropped image with updated XML
def crop_image_and_xml(image_path, xml_path, label):
    # Load image
    image = cv2.imread(image_path)
    height, width = image.shape[:2]

    # Parse XML
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Get bounding box
    bndbox = root.find('object/bndbox')
    xmin = int(bndbox.find('xmin').text)
    ymin = int(bndbox.find('ymin').text)
    xmax = int(bndbox.find('xmax').text)
    ymax = int(bndbox.find('ymax').text)

    # Calculate the range for random cropping ensuring the bounding box stays within the cropped image
    crop_x_min = max(0, xmin - random.randint(0, xmin))
    crop_y_min = max(0, ymin - random.randint(0, ymin))
    crop_x_max = min(width, xmax + random.randint(0, width - xmax))
    crop_y_max = min(height, ymax + random.randint(0, height - ymax))

    cropped_image = image[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
    cropped_height, cropped_width = cropped_image.shape[:2]

    # Save cropped image
    base_filename = os.path.basename(image_path)
    cropped_image_name = base_filename.replace(label, f"{label}.crop")
    cropped_image_path = os.path.join(CROPPED_IMAGES_PATH, cropped_image_name)
    cv2.imwrite(cropped_image_path, cropped_image)

    # Update XML filename and path
    root.find('filename').text = cropped_image_name
    root.find('path').text = cropped_image_path

    # Adjust bounding box to match the cropped image
    new_xmin = xmin - crop_x_min
    new_ymin = ymin - crop_y_min
    new_xmax = xmax - crop_x_min
    new_ymax = ymax - crop_y_min

    bndbox.find('xmin').text = str(new_xmin)
    bndbox.find('ymin').text = str(new_ymin)
    bndbox.find('xmax').text = str(new_xmax)
    bndbox.find('ymax').text = str(new_ymax)

    # Save updated XML
    cropped_xml_path = os.path.join(CROPPED_IMAGES_PATH, cropped_image_name.replace('.jpg', '.xml'))
    tree.write(cropped_xml_path)

# Loop through original images and XML files
for filename in os.listdir(ORIGINAL_IMAGES_PATH):
    if filename.endswith(".jpg"):
        label = filename.split('.')[0]
        image_path = os.path.join(ORIGINAL_IMAGES_PATH, filename)
        xml_path = image_path.replace(".jpg", ".xml")
        
        if os.path.exists(xml_path):
            crop_image_and_xml(image_path, xml_path, label)


## remove_background

In [17]:
import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET

# Define the path where original images are stored and where the transformed images will be saved
ORIGINAL_IMAGES_PATH = "tensorflow/workspace/images/collectedimages"
BACKGROUND_REMOVED_IMAGES_PATH = "tensorflow/workspace/images/transformed_images/remove_background"

# Ensure the output directory exists
os.makedirs(BACKGROUND_REMOVED_IMAGES_PATH, exist_ok=True)

# Function to remove background and save image with updated XML
def remove_background_and_xml(image_path, xml_path, label):
    # Load image
    image = cv2.imread(image_path)
    height, width = image.shape[:2]

    # Parse XML
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Get bounding box
    bndbox = root.find('object/bndbox')
    xmin = int(bndbox.find('xmin').text)
    ymin = int(bndbox.find('ymin').text)
    xmax = int(bndbox.find('xmax').text)
    ymax = int(bndbox.find('ymax').text)

    # Create a mask for the bounding box
    mask = np.zeros((height, width), dtype=np.uint8)
    mask[ymin:ymax, xmin:xmax] = 255

    # Apply the mask to the image
    result = cv2.bitwise_and(image, image, mask=mask)

    # Save the result image
    base_filename = os.path.basename(image_path)
    background_removed_image_name = base_filename.replace(label, f"{label}.bg_removed")
    background_removed_image_path = os.path.join(BACKGROUND_REMOVED_IMAGES_PATH, background_removed_image_name)
    cv2.imwrite(background_removed_image_path, result)

    # Update XML filename and path
    root.find('filename').text = background_removed_image_name
    root.find('path').text = background_removed_image_path

    # Save updated XML
    background_removed_xml_path = os.path.join(BACKGROUND_REMOVED_IMAGES_PATH, background_removed_image_name.replace('.jpg', '.xml'))
    tree.write(background_removed_xml_path)

# Loop through original images and XML files
for filename in os.listdir(ORIGINAL_IMAGES_PATH):
    if filename.endswith(".jpg"):
        label = filename.split('.')[0]
        image_path = os.path.join(ORIGINAL_IMAGES_PATH, filename)
        xml_path = image_path.replace(".jpg", ".xml")
        
        if os.path.exists(xml_path):
            remove_background_and_xml(image_path, xml_path, label)


# 3. SPLIT collectedimages dataset into train 80 % & test 20%

## 3.1 COPY transformed images to collectedimages

In [18]:
# copy all images from images/transformed_images subfolders to images/collectedimages
import os
import shutil

# Define the source and destination directories
SOURCE_DIR = "tensorflow/workspace/images/transformed_images"
DESTINATION_DIR = "tensorflow/workspace/images/collectedimages"

# Ensure the destination directory exists
os.makedirs(DESTINATION_DIR, exist_ok=True)

# Function to copy all files from source subfolders to the destination directory
def copy_files_to_collectedimages(source_dir, dest_dir):
    for root, dirs, files in os.walk(source_dir):
        for file in files:
            source_file = os.path.join(root, file)
            dest_file = os.path.join(dest_dir, file)
            shutil.copy2(source_file, dest_file)

# Copy all files from the transformed_images subfolders to collectedimages
copy_files_to_collectedimages(SOURCE_DIR, DESTINATION_DIR)


## 3.2 SPLITTING data set

In [4]:
# split images/collectedimages into train 80% vs test 20%

import os
import random
import shutil
import math

# Define paths
IMAGES_PATH = "Tensorflow/workspace/images/collectedimages"
TRAIN_PATH = "Tensorflow/workspace/images/train"
TEST_PATH = "Tensorflow/workspace/images/test"

# Ensure train and test directories exist
os.makedirs(TRAIN_PATH, exist_ok=True)
os.makedirs(TEST_PATH, exist_ok=True)

# Get list of all image files
files = [f for f in os.listdir(IMAGES_PATH) if f.endswith('.jpg')]

# Group files by label
label_dict = {}
for file in files:
    label = file.split('.')[0]
    if label not in label_dict:
        label_dict[label] = []
    label_dict[label].append(file)

# Shuffle and split files by label
train_files = []
test_files = []

for label, file_list in label_dict.items():
    random.shuffle(file_list)
    num_train = math.floor(len(file_list) * 0.8)
    train_files.extend(file_list[:num_train])
    test_files.extend(file_list[num_train:])

# Function to copy files to their respective directories
def copy_files(file_list, destination_path):
    for file in file_list:
        base_filename = os.path.splitext(file)[0]
        # Copy image file
        shutil.copy(os.path.join(IMAGES_PATH, file), destination_path)
        # Copy corresponding XML file
        shutil.copy(os.path.join(IMAGES_PATH, base_filename + '.xml'), destination_path)

# Copy train files
copy_files(train_files, TRAIN_PATH)

# Copy test files
copy_files(test_files, TEST_PATH)

print(f"Train and test datasets created. {len(train_files)} images in the train set and {len(test_files)} images in the test set.")


Train and test datasets created. 108 images in the train set and 27 images in the test set.
