### Load Dependencies

In [None]:
%pip install opendatasets

### Imports

In [None]:
import numpy as np
import opendatasets as od
import os
from PIL import Image
import random as rand
from utils import ProgressBar

### Load Dataset

In [None]:
# Constants
TEST_DATA_DIR = 'asl-alphabet/asl_alphabet_test/asl_alphabet_test'
TRAIN_DATA_DIR = 'asl-alphabet/asl_alphabet_train/asl_alphabet_train'

In [None]:
# Download Kaggle dataset (Kaggle username and key is required)
# {"username":"christopherconroy","key":"1915e76943ae798bc236fb7c2de6d28d"}
od.download('https://www.kaggle.com/datasets/grassknoted/asl-alphabet')

In [None]:
def load_train_dataset(data_dir, num_samples, seed=None):
    # Seed random number generator
    if seed != None:
        rand.seed(seed)

    # Label dictionary
    label_dict = {
        'A': 0,  'B': 1,  'C': 2,  'D': 3,  'E': 4,  'F': 5,  'G': 6,  'H': 7,  'I': 8,  'J': 9, 
        'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 
        'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25, 'del': 26, 'nothing': 27, 'space': 28
    }

    # Initialize image and label lists
    images, labels = [], []

    # Initialize directories and progress bar
    dir_list = os.listdir(data_dir)
    progress = ProgressBar(len(dir_list))

    # Load data
    print('Loading training dataset:')
    for class_dir in dir_list:
        # Get files in directory and generate sample indices
        files = os.listdir(os.path.join(data_dir, class_dir))
        samples = rand.sample(range(0, len(files)), num_samples)

        for i in samples:
            # Load image file and convert to RGB
            filepath = os.path.join(data_dir, class_dir, files[i])
            image = Image.open(filepath) 

            # Store image and label
            images.append(image)
            labels.append(label_dict[class_dir])

        # Update progress bar
        progress.step()

    # Return images and labels
    return images, labels

In [None]:
num_samples = 30  # Number of samples per class
x, y = load_train_dataset(TRAIN_DATA_DIR, num_samples, 0)