In [1]:
!pip install opencv-python tensorflow scikit-learn



In [3]:
import cv2
import os
import numpy as np

# Original class frequencies
class_frequencies = {
    "answer": 0.1935, "ask": 0.0059, "asl": 0.0092, "aunt": 0.0003, "away": 0.0004,
    "bad": 0.0015, "brother": 0.0003, "bye": 0.0225, "come": 0.0026, "cook": 0.2532,
    "eat": 0.2408, "father": 0.0003, "friend": 0.0002, "hello": 0.0003, "how": 0.0005,
    "me": 0.0240, "mother": 0.0076, "name": 0.0020, "no": 0.0365, "please": 0.0436,
    "sister": 0.0086, "thankyou": 0.0002, "uncle": 0.0182, "welcome": 0.0113,
    "what": 0.0031, "yes": 0.0005, "you": 0.1131
}

# Invert frequencies
inverted_freqs = {cls: 1 / freq for cls, freq in class_frequencies.items()}
total_inverse = sum(inverted_freqs.values())

# Config
TOTAL_FRAMES = 5000
video_dir = r"D:\asl_words\vidoes"
output_base_dir = r"D:\asl_words\frames"
os.makedirs(output_base_dir, exist_ok=True)


# Calculate frames to extract per class (inversely weighted)
frame_counts = {}
for cls, inv in inverted_freqs.items():
    share = inv / total_inverse
    count = max(1, round(share * TOTAL_FRAMES))
    frame_counts[cls] = count

# Extract frames
for cls, frame_count in frame_counts.items():
    video_path = os.path.join(video_dir, f"{cls}.mp4")
    output_dir = os.path.join(output_base_dir, cls)
    os.makedirs(output_dir, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"❌ Could not open video for class: {cls}")
        continue

    total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_video_frames == 0:
        print(f"⚠️ No frames found in video for class: {cls}")
        cap.release()
        continue

    frame_indices = np.linspace(0, total_video_frames - 1, num=min(frame_count, total_video_frames), dtype=int)

    frame_id = 0
    saved = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if frame_id in frame_indices:
            save_path = os.path.join(output_dir, f"{cls}_{saved+1}.jpg")
            cv2.imwrite(save_path, frame)
            saved += 1
            if saved >= len(frame_indices):
                break
        frame_id += 1

    cap.release()
    print(f"✅ {cls}: saved {saved} frames (target: {frame_count})")

print("🎉 Frame extraction (inverse frequency) completed.")


✅ answer: saved 1 frames (target: 1)
✅ ask: saved 26 frames (target: 26)
✅ asl: saved 17 frames (target: 17)
✅ aunt: saved 75 frames (target: 512)
✅ away: saved 79 frames (target: 384)
✅ bad: saved 71 frames (target: 102)
✅ brother: saved 81 frames (target: 512)
✅ bye: saved 7 frames (target: 7)
✅ come: saved 59 frames (target: 59)
✅ cook: saved 1 frames (target: 1)
✅ eat: saved 1 frames (target: 1)
✅ father: saved 83 frames (target: 512)
✅ friend: saved 80 frames (target: 768)
✅ hello: saved 69 frames (target: 512)
✅ how: saved 72 frames (target: 307)
✅ me: saved 6 frames (target: 6)
✅ mother: saved 20 frames (target: 20)
✅ name: saved 75 frames (target: 77)
✅ no: saved 4 frames (target: 4)
✅ please: saved 4 frames (target: 4)
✅ sister: saved 18 frames (target: 18)
✅ thankyou: saved 75 frames (target: 768)
✅ uncle: saved 8 frames (target: 8)
✅ welcome: saved 14 frames (target: 14)
✅ what: saved 50 frames (target: 50)
✅ yes: saved 63 frames (target: 307)
✅ you: saved 1 frames (target: 

In [5]:
import os
from collections import defaultdict

# Path to the folder containing class subfolders (where images are saved)
base_dir = r"D:\asl_words\frames"

# Dictionary to hold class frequencies
class_frequencies = defaultdict(int)

# Loop through each class folder
for class_name in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_name)
    if os.path.isdir(class_path):
        # Count number of image files in the folder
        image_count = len([
            file for file in os.listdir(class_path)
            if file.lower().endswith(('.jpg', '.jpeg', '.png'))
        ])
        class_frequencies[class_name] = image_count

# Display results
print("📊 Frame count per class:")
total = sum(class_frequencies.values())
for cls, count in sorted(class_frequencies.items()):
    freq = count / total if total > 0 else 0
    print(f"{cls}: {count} frames ({freq:.4f} frequency)")

print(f"\nTotal frames: {total}")


📊 Frame count per class:
answer: 1 frames (0.0009 frequency)
ask: 26 frames (0.0245 frequency)
asl: 17 frames (0.0160 frequency)
aunt: 75 frames (0.0708 frequency)
away: 79 frames (0.0745 frequency)
bad: 71 frames (0.0670 frequency)
brother: 81 frames (0.0764 frequency)
bye: 7 frames (0.0066 frequency)
come: 59 frames (0.0557 frequency)
cook: 1 frames (0.0009 frequency)
eat: 1 frames (0.0009 frequency)
father: 83 frames (0.0783 frequency)
friend: 80 frames (0.0755 frequency)
hello: 69 frames (0.0651 frequency)
how: 72 frames (0.0679 frequency)
me: 6 frames (0.0057 frequency)
mother: 20 frames (0.0189 frequency)
name: 75 frames (0.0708 frequency)
no: 4 frames (0.0038 frequency)
please: 4 frames (0.0038 frequency)
sister: 18 frames (0.0170 frequency)
thankyou: 75 frames (0.0708 frequency)
uncle: 8 frames (0.0075 frequency)
welcome: 14 frames (0.0132 frequency)
what: 50 frames (0.0472 frequency)
yes: 63 frames (0.0594 frequency)
you: 1 frames (0.0009 frequency)

Total frames: 1060


In [6]:
!pip install imgaug opencv-python imageio

Collecting imgaug
  Downloading imgaug-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting Shapely (from imgaug)
  Downloading shapely-2.1.1-cp310-cp310-win_amd64.whl.metadata (7.0 kB)
Downloading imgaug-0.4.0-py2.py3-none-any.whl (948 kB)
   ---------------------------------------- 0.0/948.0 kB ? eta -:--:--
   ---- ----------------------------------- 112.6/948.0 kB 2.2 MB/s eta 0:00:01
   ------------- -------------------------- 317.4/948.0 kB 3.3 MB/s eta 0:00:01
   ------------------- -------------------- 471.0/948.0 kB 2.9 MB/s eta 0:00:01
   --------------------------- ------------ 645.1/948.0 kB 3.1 MB/s eta 0:00:01
   ---------------------------------- ----- 809.0/948.0 kB 3.2 MB/s eta 0:00:01
   ---------------------------------------- 948.0/948.0 kB 3.3 MB/s eta 0:00:00
Downloading shapely-2.1.1-cp310-cp310-win_amd64.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
   --------- ------------------------------ 0.4/1.7 MB 8.1 MB/s eta 0:00:01

In [7]:
import os
import imageio
import numpy as np
from imgaug import augmenters as iaa
import cv2

# Base folder where class folders are located
base_dir = r"D:\asl_words\frames"

# Classes you want to augment
low_classes = ["answer", "you", "eat"]

# Number of augmented images per original image
AUG_PER_IMAGE = 10

# Define augmentation pipeline
augmenter = iaa.Sequential([
    iaa.Fliplr(0.5),  # horizontal flip
    iaa.Affine(rotate=(-15, 15), scale=(0.9, 1.1)),
    iaa.AdditiveGaussianNoise(scale=(5, 15)),
    iaa.Multiply((0.8, 1.2)),  # brightness
    iaa.LinearContrast((0.8, 1.2)),
    iaa.Crop(percent=(0, 0.1))  # random crop
])

for cls in low_classes:
    cls_path = os.path.join(base_dir, cls)
    if not os.path.exists(cls_path):
        print(f"❌ Folder not found: {cls_path}")
        continue

    images = [
        f for f in os.listdir(cls_path)
        if f.lower().endswith((".jpg", ".jpeg", ".png"))
    ]

    print(f"\n🔁 Augmenting class: {cls} ({len(images)} original images)")

    for img_name in images:
        img_path = os.path.join(cls_path, img_name)
        image = imageio.imread(img_path)

        for i in range(AUG_PER_IMAGE):
            aug_img = augmenter(image=image)
            aug_filename = f"{os.path.splitext(img_name)[0]}_aug{i+1}.jpg"
            aug_path = os.path.join(cls_path, aug_filename)
            imageio.imwrite(aug_path, aug_img)

    print(f"✅ {cls}: added {len(images) * AUG_PER_IMAGE} augmented images")

print("\n🎉 Augmentation complete.")



🔁 Augmenting class: answer (1 original images)


  image = imageio.imread(img_path)


✅ answer: added 10 augmented images

🔁 Augmenting class: you (1 original images)
✅ you: added 10 augmented images

🔁 Augmenting class: eat (1 original images)
✅ eat: added 10 augmented images

🎉 Augmentation complete.


In [11]:
import os
import imageio
import numpy as np
from imgaug import augmenters as iaa
import random
import shutil

# Base directory containing class folders
base_dir = r"D:\asl_words\frames"

# Target number of images per class
TARGET_COUNT = 80  # You can set this to max class size or any fixed value

# Define augmentation sequence
augmenter = iaa.Sequential([
    iaa.Fliplr(0.5),
    iaa.Affine(rotate=(-15, 15), scale=(0.9, 1.1)),
    iaa.AdditiveGaussianNoise(scale=(5, 15)),
    iaa.Multiply((0.8, 1.2)),
    iaa.LinearContrast((0.8, 1.2)),
    iaa.Crop(percent=(0, 0.1))
])

# Loop through each class folder
for cls_name in os.listdir(base_dir):
    cls_path = os.path.join(base_dir, cls_name)
    if not os.path.isdir(cls_path):
        continue

    images = [
        f for f in os.listdir(cls_path)
        if f.lower().endswith((".jpg", ".jpeg", ".png"))
    ]

    count = len(images)
    print(f"\n🔎 {cls_name}: {count} images")

    if count == 0:
        print(f"⚠️ Skipping {cls_name}: No images found.")
        continue

    # Case 1: Class has more than target → randomly downsample
    if count > TARGET_COUNT:
        print(f"✂️ Trimming {cls_name} to {TARGET_COUNT} images")
        keep_images = random.sample(images, TARGET_COUNT)
        for img in images:
            if img not in keep_images:
                os.remove(os.path.join(cls_path, img))

    # Case 2: Class has fewer than target → augment
    elif count < TARGET_COUNT:
        num_needed = TARGET_COUNT - count
        print(f"🔁 Augmenting {cls_name} with {num_needed} images")
        images = [
            f for f in os.listdir(cls_path)
            if f.lower().endswith((".jpg", ".jpeg", ".png"))
        ]
        img_idx = 0
        while num_needed > 0:
            img_file = images[img_idx % len(images)]
            img_path = os.path.join(cls_path, img_file)
            image = imageio.imread(img_path)

            aug_img = augmenter(image=image)
            new_filename = f"{os.path.splitext(img_file)[0]}_aug{num_needed}.jpg"
            save_path = os.path.join(cls_path, new_filename)
            imageio.imwrite(save_path, aug_img)

            img_idx += 1
            num_needed -= 1

        print(f"✅ {cls_name} now has {TARGET_COUNT} images")

    else:
        print(f"✔️ {cls_name} already balanced")

print("\n🎉 Dataset balancing complete!")



🔎 answer: 111 images
✂️ Trimming answer to 80 images

🔎 ask: 26 images
🔁 Augmenting ask with 54 images


  image = imageio.imread(img_path)


✅ ask now has 80 images

🔎 asl: 17 images
🔁 Augmenting asl with 63 images
✅ asl now has 80 images

🔎 aunt: 75 images
🔁 Augmenting aunt with 5 images
✅ aunt now has 80 images

🔎 away: 79 images
🔁 Augmenting away with 1 images
✅ away now has 80 images

🔎 bad: 71 images
🔁 Augmenting bad with 9 images
✅ bad now has 80 images

🔎 brother: 81 images
✂️ Trimming brother to 80 images

🔎 bye: 7 images
🔁 Augmenting bye with 73 images
✅ bye now has 80 images

🔎 come: 59 images
🔁 Augmenting come with 21 images
✅ come now has 80 images

🔎 cook: 1 images
🔁 Augmenting cook with 79 images
✅ cook now has 80 images

🔎 eat: 111 images
✂️ Trimming eat to 80 images

🔎 father: 83 images
✂️ Trimming father to 80 images

🔎 friend: 80 images
✔️ friend already balanced

🔎 hello: 69 images
🔁 Augmenting hello with 11 images
✅ hello now has 80 images

🔎 how: 72 images
🔁 Augmenting how with 8 images
✅ how now has 80 images

🔎 me: 6 images
🔁 Augmenting me with 74 images
✅ me now has 80 images

🔎 mother: 20 images
🔁 A

In [13]:
import os
from collections import defaultdict

# Path to the folder containing class subfolders (where images are saved)
base_dir = r"D:\asl_words\frames"

# Dictionary to hold class frequencies
class_frequencies = defaultdict(int)

# Loop through each class folder
for class_name in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_name)
    if os.path.isdir(class_path):
        # Count number of image files in the folder
        image_count = len([
            file for file in os.listdir(class_path)
            if file.lower().endswith(('.jpg', '.jpeg', '.png'))
        ])
        class_frequencies[class_name] = image_count

# Display results
print("📊 Frame count per class:")
total = sum(class_frequencies.values())
for cls, count in sorted(class_frequencies.items()):
    freq = count / total if total > 0 else 0
    print(f"{cls}: {count} frames ({freq:.4f} frequency)")

print(f"\nTotal frames: {total}")


📊 Frame count per class:
answer: 80 frames (0.0370 frequency)
ask: 80 frames (0.0370 frequency)
asl: 80 frames (0.0370 frequency)
aunt: 80 frames (0.0370 frequency)
away: 80 frames (0.0370 frequency)
bad: 80 frames (0.0370 frequency)
brother: 80 frames (0.0370 frequency)
bye: 80 frames (0.0370 frequency)
come: 80 frames (0.0370 frequency)
cook: 80 frames (0.0370 frequency)
eat: 80 frames (0.0370 frequency)
father: 80 frames (0.0370 frequency)
friend: 80 frames (0.0370 frequency)
hello: 80 frames (0.0370 frequency)
how: 80 frames (0.0370 frequency)
me: 80 frames (0.0370 frequency)
mother: 80 frames (0.0370 frequency)
name: 80 frames (0.0370 frequency)
no: 80 frames (0.0370 frequency)
please: 80 frames (0.0370 frequency)
sister: 80 frames (0.0370 frequency)
thankyou: 80 frames (0.0370 frequency)
uncle: 80 frames (0.0370 frequency)
welcome: 80 frames (0.0370 frequency)
what: 80 frames (0.0370 frequency)
yes: 80 frames (0.0370 frequency)
you: 80 frames (0.0370 frequency)

Total frames: 216