In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

In [None]:
def convert_to_greyscale_28x28(image_path):
    img = Image.open(image_path)
    grey_img = img.convert("L")
    resized_img = grey_img.resize((28, 28))
    return resized_img

In [None]:
def process_images_in_parallel(image_paths):
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(convert_to_greyscale_28x28, image_paths))
    return results

In [None]:
def store_features(file_path, output_df):
    if os.path.isfile(file_path):
        output_df.to_csv(file_path, mode='a', header=False, index=False)
    else:
        output_df.to_csv(file_path, mode='w', header=True, index=False)

In [None]:
input_path = 'dataset/asl_dataset'
output_path = 'dataset/train.csv'
df = pd.DataFrame(columns = ['label'] + [f'pixel{i}' for i in range(1, 785)])

if os.path.isfile(input_path):
    label = 'a'
    transformed_image = convert_to_greyscale_28x28(input_path)
    plt.imshow(transformed_image, cmap='gray', vmin=0, vmax=255)
    features = np.array(transformed_image).flatten()
    df = df.append({'label': label, **dict(zip(df.columns[1:], features))}, ignore_index=True)
elif os.path.isdir(input_path):
    for label in os.listdir(input_path):
        label_path = os.path.join(input_path, label)
        image_paths = [os.path.join(label_path, filename) for filename in os.listdir(label_path)]
        processed_images = process_images_in_parallel(image_paths)
        for image in processed_images:
            features = np.array(image).flatten()
            df = df.append({'label': label, **dict(zip(df.columns[1:], features))}, ignore_index=True)
    store_features(output_path, df)
else:
    print("The path is not valid")