In [2]:
import os
import shutil
from sklearn.model_selection import train_test_split
from pathlib import Path
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor

#Notebook para preprocesar los datasets y dividirlos en TRAIN y TEST

In [3]:
dataset_path = Path("..\\datasets\\ASL_Comp")
train_dataset = Path("..\\datasets\\ASL_train")
test_dataset = Path("..\\datasets\\ASL_test")

In [4]:
if not os.path.exists(train_dataset):
    os.makedirs(train_dataset)
if not os.path.exists(test_dataset):
    os.makedirs(test_dataset)

#Codigo de ejemplo - separar en train y test dataset
'''archivos = os.listdir(os.path.join(dataset_path,"A_ASL"))
train_files,test_files = train_test_split(archivos,test_size = 0.2, random_state = 42)'''
RESIZE = 224

In [5]:
def preprocessing_image(image):
    #Convertir paths a raw strings
    image_path = str(image)
    image_r = rf"{image_path}"
    img = cv2.imread(image_r, cv2.IMREAD_COLOR)
    nw = RESIZE
    nh = RESIZE
    width,height = img.shape[:2]
    inter_type = None
    if width > nw or height > nh:
        inter_type = cv2.INTER_AREA
    else:
        inter_type = cv2.INTER_CUBIC
    new_image = cv2.resize(img,(nw,nh),inter_type)
    return new_image[:,:,::-1]

#Ejemplo: 

In [6]:
def convert(file_array,c_path,n_path):
    for file in file_array:
        f_path = os.path.join(c_path,file)
        try:
            imagen = Image.fromarray(preprocessing_image(Path(f_path)))
            #print("Imagen preprocesada con exito a formato 224 x 224")
            file_nombre = file[:-4] + ".jpg"
            imagen.save(os.path.join(n_path,file_nombre))
        except Exception as e:
            print(f"Hubo un error en el momento de preprocesarse. Error: {e}")

In [7]:
def convert_wrapper(files, src_path, dest_path):
    try:
        convert(files, src_path, dest_path)
    except Exception as e:
        print(f"Error al convertir de {src_path} a {dest_path}: {e}")

In [8]:
def process_class(class_name):
    try:
        c_path = os.path.join(dataset_path, class_name)
        train_path = os.path.join(train_dataset, class_name)
        test_path = os.path.join(test_dataset, class_name)

        if not os.path.exists(train_path):
            os.makedirs(train_path)
        if not os.path.exists(test_path):
            os.makedirs(test_path)
        
        files = os.listdir(c_path)
        train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)
        
        # Use ThreadPoolExecutor to convert train and test files concurrently
        with ThreadPoolExecutor(max_workers = 2) as executor:
            futures = []
            futures.append(executor.submit(convert_wrapper, train_files, c_path, train_path))
            futures.append(executor.submit(convert_wrapper, test_files, c_path, test_path))
            
            # Wait for both futures to complete
            for future in futures:
                future.result()  # This will re-raise any exception that occurred in the thread
    
    except Exception as e:
        print(f"Error processing class {class_name}: {e}")

In [9]:
class_names = os.listdir(dataset_path)
num_workers = 2

In [10]:
with ThreadPoolExecutor(max_workers = num_workers) as executor:
    futures = [executor.submit(process_class,class_name) for class_name in class_names]
    for future in futures:
        future.result()

print("Todo procesado con exito")

Todo procesado con exito


In [4]:
'''for class_name in os.listdir(dataset_path):
    c_path = os.path.join(dataset_path,class_name)
    train_path = os.path.join(train_dataset,class_name)
    test_path = os.path.join(test_dataset,class_name)
    #Crear path de los directorios train y test si no existen
    if not os.path.exists(train_path):
        os.makedirs(train_path)
    if not os.path.exists(test_path):
        os.makedirs(test_path)
    files = os.listdir(c_path)
    train_files,test_files = train_test_split(files,test_size = 0.2, random_state = 42)
    #Convertir imagen y guardarlas en sus respectivas rutas
    convert(train_files,c_path,train_path)
    convert(test_files,c_path,test_path)'''
        

'for class_name in os.listdir(dataset_path):\n    c_path = os.path.join(dataset_path,class_name)\n    train_path = os.path.join(train_dataset,class_name)\n    test_path = os.path.join(test_dataset,class_name)\n    #Crear path de los directorios train y test si no existen\n    if not os.path.exists(train_path):\n        os.makedirs(train_path)\n    if not os.path.exists(test_path):\n        os.makedirs(test_path)\n    files = os.listdir(c_path)\n    train_files,test_files = train_test_split(files,test_size = 0.2, random_state = 42)\n    #Convertir imagen y guardarlas en sus respectivas rutas\n    convert(train_files,c_path,train_path)\n    convert(test_files,c_path,test_path)'