### Numpy files for Deep Learning

- Converts the images into numpy arrays.

In [17]:
import numpy as np
import os
from PIL import Image
from tqdm import tqdm
import concurrent.futures
import gc

In [18]:
# Configuración de directorios
images_directory = '../Datasets_DeepShadows/Jpeg_data/Test/'
output_directory = '../Datasets_DeepShadows/array_images/'
os.makedirs(output_directory, exist_ok=True)

In [19]:
# Parámetros clave (basados en tu descarga)
ORIGINAL_PIXSCALE = 0.262  # Arcsec/pixel usado en tus descargas
DEEP_SHADOWS_ANGULAR_SIZE = 30  # Arcsec (tamaño angular objetivo)

In [20]:
# 1. Calcular el recorte necesario para emular DeepShadows
def calculate_crop_size(img_width):
    """
    Calcula el recorte necesario para obtener 30"x30"
    basado en el pixscale original de 0.262
    """
    total_arcsec = img_width * ORIGINAL_PIXSCALE
    crop_pixels = int((total_arcsec - DEEP_SHADOWS_ANGULAR_SIZE) / ORIGINAL_PIXSCALE / 2)
    return crop_pixels

In [21]:
# 2. Función de procesamiento optimizada
def process_image(filename):
    try:
        image_path = os.path.join(images_directory, filename)
        img = Image.open(image_path)
        width, height = img.size
        
        # Paso 1: Recortar para obtener 30"x30"
        crop_size = calculate_crop_size(width)
        img = img.crop((
            crop_size, 
            crop_size, 
            width - crop_size, 
            height - crop_size
        ))
        
        # Paso 2: Redimensionar a 64x64 (como en DeepShadows)
        img = img.resize((64, 64), Image.LANCZOS)
        
        # Convertir a array y normalizar
        img_array = np.array(img, dtype=np.float32) / 255.0
        
        return img_array
    except Exception as e:
        print(f"Error procesando {filename}: {str(e)}")
        return None

In [22]:
# 3. Procesamiento por lotes (optimizado para memoria)
def process_image_batch(batch_files):
    """Procesa un lote de imágenes"""
    batch_arrays = []
    for filename in batch_files:
        arr = process_image(filename)
        if arr is not None:
            batch_arrays.append(arr)
    return np.stack(batch_arrays) if batch_arrays else None

In [23]:
# 4. Procesamiento principal
image_files = [f for f in os.listdir(images_directory) 
              if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

print(f"Procesando {len(image_files)} imágenes...")
print(f"Parámetros: ORIGINAL_PIXSCALE={ORIGINAL_PIXSCALE}, DEEP_SHADOWS_ANGULAR_SIZE={DEEP_SHADOWS_ANGULAR_SIZE}")

# Procesar en lotes de 500 imágenes
batch_size = 500
full_array = None

for i in tqdm(range(0, len(image_files), batch_size), 
             desc="Procesando imágenes", 
             unit="batch"):
    
    batch_files = image_files[i:i+batch_size]
    batch_array = process_image_batch(batch_files)
    
    if batch_array is not None:
        # Guardar lote temporal
        batch_path = os.path.join(output_directory, f'temp_batch_{i//batch_size}.npy')
        np.save(batch_path, batch_array)
        
        # Liberar memoria
        del batch_array
        gc.collect()

Procesando 4983 imágenes...
Parámetros: ORIGINAL_PIXSCALE=0.262, DEEP_SHADOWS_ANGULAR_SIZE=30


Procesando imágenes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [03:17<00:00, 19.75s/batch]


In [24]:
# 5. Combinar todos los lotes
print("Combinando lotes...")
batch_files = [f for f in os.listdir(output_directory) if f.startswith('temp_batch_')]
batch_files.sort(key=lambda x: int(x.split('_')[2].split('.')[0]))

all_arrays = []
for batch_file in tqdm(batch_files, desc="Cargando lotes"):
    batch_path = os.path.join(output_directory, batch_file)
    all_arrays.append(np.load(batch_path))
    os.remove(batch_path)  # Eliminar temporal

full_array = np.concatenate(all_arrays, axis=0)
np.save(os.path.join(output_directory, 'X_test.npy'), full_array)
print(f"Array final guardado. Dimensiones: {full_array.shape}")

Combinando lotes...


Cargando lotes: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 150.69it/s]


Array final guardado. Dimensiones: (4983, 64, 64, 3)
