# üß† Clasificaci√≥n de la Enfermedad de Alzheimer
Integrantes
- Diego Alexander Hern√°ndez Silvestre - 21270
- Linda In√©s Jim√©nez Vides - 21169
- Mario Antonio Guerra Morales - 21008
- Kristopher Javier Alvarado L√≥pez - 21188

In [1]:
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import random
import shutil

#### üìä Balanceo de Data

In [7]:
# Definir las transformaciones que aplicar√°s
augmentations = transforms.Compose([
    transforms.RandomRotation(degrees=10),  # Rotaci√≥n de ¬±10 grados
    transforms.RandomAffine(degrees=0, translate=(0.02, 0.02)),  # Desplazamiento horizontal/vertical 2%
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.92, 1.08)),  # Zoom hasta 8%
    transforms.ToTensor(),
])

# Clase personalizada para cargar im√°genes desde carpetas
class DementiaDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_list = os.listdir(image_dir)
        self.transform = transform

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_list[idx])
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image

# Directorios de las carpetas de im√°genes
folders = {
    "MildDemented": "data/train/MildDemented",
    "ModerateDemented": "data/train/ModerateDemented",
    "NonDemented": "data/train/NonDemented",
    "VeryMildDemented": "data/train/VeryMildDemented"
}

target_size = 3200  # N√∫mero objetivo de im√°genes por clase

# Funci√≥n para aplicar augmentaci√≥n y guardar nuevas im√°genes
def augment_and_save(dataset, save_dir, current_size, target_size):
    counter = current_size
    loader = DataLoader(dataset, batch_size=1, shuffle=True)
    
    while counter < target_size:
        for batch in loader:
            augmented_img = transforms.ToPILImage()(batch[0])  # Convertir tensor a imagen PIL
            augmented_img.save(os.path.join(save_dir, f'augmented_{counter}.jpg'))  # Guardar imagen
            counter += 1
            if counter >= target_size:
                break

# Iterar sobre cada carpeta
for label, folder in folders.items():
    current_images = os.listdir(folder)
    current_size = len(current_images)
    
    if current_size < target_size:
        print(f"Aplicando augmentaci√≥n en {label}. Tama√±o actual: {current_size}.")
        
        # Cargar el dataset de la clase actual
        dataset = DementiaDataset(image_dir=folder, transform=augmentations)
        
        # Aumentar im√°genes y guardar
        augment_and_save(dataset, folder, current_size, target_size)
        
    else:
        print(f"No se necesita augmentaci√≥n en {label}. Tama√±o actual: {current_size}.")


Aplicando augmentaci√≥n en MildDemented. Tama√±o actual: 717.
Aplicando augmentaci√≥n en ModerateDemented. Tama√±o actual: 52.
Aplicando augmentaci√≥n en NonDemented. Tama√±o actual: 2560.
Aplicando augmentaci√≥n en VeryMildDemented. Tama√±o actual: 1792.


In [8]:
for label, folder in folders.items():
    current_images = os.listdir(folder)
    current_size = len(current_images)
    print(f"Clase: {label}. Tama√±o actual: {current_size}.")

Clase: MildDemented. Tama√±o actual: 3200.
Clase: ModerateDemented. Tama√±o actual: 3200.
Clase: NonDemented. Tama√±o actual: 3200.
Clase: VeryMildDemented. Tama√±o actual: 3200.


#### üèãüèΩ‚Äç‚ôÄÔ∏è Divisi√≥n entrenamiento y validaci√≥n

In [11]:
# Directorios donde se guardar√°n las im√°genes divididas
train_folder = "data/train/"

train_dir = "new_data/train/"
val_dir = "new_data/validation/"

# Crear los directorios de train y validation si no existen
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Contadores para el total de im√°genes en train y validation
total_train_images = 0
total_val_images = 0

# Funci√≥n para dividir y copiar las im√°genes de train en train/validation
def split_train_validation(train_folder, label, train_dir, val_dir, split_ratio=0.7):
    global total_train_images, total_val_images
    images = os.listdir(os.path.join(train_folder, label))
    random.shuffle(images)
    
    # Calcular cu√°ntas im√°genes ir√°n a train y cu√°ntas a validation
    split_index = int(len(images) * split_ratio)
    train_images = images[:split_index]
    val_images = images[split_index:]
    
    # Actualizar los contadores
    total_train_images += len(train_images)
    total_val_images += len(val_images)
    
    # Crear carpetas de train y validation para la clase actual
    os.makedirs(os.path.join(train_dir, label), exist_ok=True)
    os.makedirs(os.path.join(val_dir, label), exist_ok=True)
    
    # Copiar im√°genes de train
    for img in train_images:
        shutil.copy(os.path.join(train_folder, label, img), os.path.join(train_dir, label, img))
    
    # Copiar im√°genes de validation
    for img in val_images:
        shutil.copy(os.path.join(train_folder, label, img), os.path.join(val_dir, label, img))

# Iterar sobre las carpetas de cada clase dentro de train
for label in os.listdir(train_folder):
    split_train_validation(train_folder, label, train_dir, val_dir, split_ratio=0.7)

# Imprimir el total de im√°genes en train y validation
print(f"Total de im√°genes en train: {total_train_images}")
print(f"Total de im√°genes en validation: {total_val_images}")
print("Divisi√≥n de im√°genes de train en train/validation completada.")

Total de im√°genes en train: 8960
Total de im√°genes en validation: 3840
Divisi√≥n de im√°genes de train en train/validation completada.
