In [1]:
import os
import pandas as pd
import cv2
import numpy as np
from tqdm import tqdm 

In [2]:
asl_dataset_thresholded_train = '../data/raw/asl-dataset/asl-dataset/train/'
asl_dataset_thresholded_val = '../data/raw/asl-dataset/asl-dataset/val/'

asl_dataset_grayscaled_train = '../data/raw/asl-dataset-gray/asl-dataset-gray/train/'
asl_dataset_grayscaled_val = '../data/raw/asl-dataset-gray/asl-dataset-gray/val/'


In [3]:
def get_data(path):
    image_paths = []
    labels = []
    
    for label in os.listdir(path):
        label_path = os.path.join(path, label) + '/'
        if os.path.isdir(label_path):
            for img_file in os.listdir(label_path):
                image_paths.append(os.path.join(label_path, img_file))
                labels.append(label)
                
    df = pd.DataFrame({"image_path": image_paths, "label": labels})
    return df

In [4]:
thresholded_train_df = get_data(asl_dataset_thresholded_train)
thresholded_val_df = get_data(asl_dataset_thresholded_val)

grayscaled_train_df = get_data(asl_dataset_grayscaled_train)
grayscaled_val_df = get_data(asl_dataset_grayscaled_val)

In [5]:

def process_images(df, img_size=32):
    image_paths = df["image_path"].values
    labels = df["label"].values

    num_images = len(image_paths)
    image_data = np.zeros((num_images, img_size * img_size), dtype=np.uint8)

    for i, path in tqdm(enumerate(image_paths), total=num_images, desc="Processing Images"):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) 
        if img is not None:
            img = cv2.resize(img, (img_size, img_size))  
            image_data[i, :] = img.flatten()  

    df_pixels = pd.DataFrame(image_data, columns=[f"p{i}" for i in range(img_size * img_size)])

    df_pixels["label"] = labels

    return df_pixels


**Image size 32**

In [17]:
thresholded_train_size32 = process_images(thresholded_train_df, img_size=32)
thresholded_val_size32 = process_images(thresholded_val_df, img_size=32)

grayscaled_train_size32 = process_images(grayscaled_train_df, img_size=32)
grayscaled_val_size32 = process_images(grayscaled_val_df, img_size=32)


Processing Images: 100%|██████████| 30050/30050 [00:06<00:00, 4547.28it/s]
Processing Images: 100%|██████████| 7523/7523 [00:43<00:00, 172.76it/s]
Processing Images: 100%|██████████| 22880/22880 [02:06<00:00, 181.02it/s]
Processing Images: 100%|██████████| 4053/4053 [00:24<00:00, 166.52it/s]


In [29]:
thresholded_val_size32.head()

Unnamed: 0,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,...,p1015,p1016,p1017,p1018,p1019,p1020,p1021,p1022,p1023,label
0,255,255,255,255,255,255,255,255,255,255,...,253,254,79,255,246,255,255,255,255,A
1,255,255,255,255,255,255,255,255,255,255,...,255,254,254,167,152,255,255,255,255,A
2,255,255,255,255,255,255,255,255,255,255,...,142,241,255,218,255,255,255,255,255,A
3,255,255,255,255,255,255,255,255,255,255,...,0,0,255,36,255,255,255,255,255,A
4,255,255,255,255,255,255,255,255,255,255,...,0,167,254,167,255,255,255,255,255,A


In [None]:
thresholded_train_size32.to_pickle("../data/processed/thresholded_train_size32.pkl")
thresholded_val_size32.to_pickle("../data/processed/thresholded_val_size32.pkl")

grayscaled_train_size32.to_pickle("../data/processed/grayscaled_train_size32.pkl")
grayscaled_val_size32.to_pickle("../data/processed/grayscaled_val_size32.pkl")


**Image size 64**

In [6]:
thresholded_train_size64 = process_images(thresholded_train_df, img_size=64)
thresholded_val_size64 = process_images(thresholded_val_df, img_size=64)

grayscaled_train_size64 = process_images(grayscaled_train_df, img_size=64)
grayscaled_val_size64 = process_images(grayscaled_val_df, img_size=64)


Processing Images: 100%|██████████| 30050/30050 [03:12<00:00, 156.14it/s]
Processing Images: 100%|██████████| 7523/7523 [00:45<00:00, 166.73it/s]
Processing Images: 100%|██████████| 22880/22880 [02:13<00:00, 170.98it/s]
Processing Images: 100%|██████████| 4053/4053 [00:25<00:00, 159.21it/s]


In [7]:
thresholded_train_size64.to_pickle("../data/processed/thresholded_train_size64.pkl")
thresholded_val_size64.to_pickle("../data/processed/thresholded_val_size64.pkl")

grayscaled_train_size64.to_pickle("../data/processed/grayscaled_train_size64.pkl")
grayscaled_val_size64.to_pickle("../data/processed/grayscaled_val_size64.pkl")