# Derma-Challenge

Para  comenzar, importaremos las bibliotecas necesarias

In [1]:
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
import torch 
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

## Data Exploration

- Cargar y visualizar algunas imágenes de muestra
- Analizar la distribución de clases
- Calcular estadísticas básicas de las imágenes

In [2]:
data_path = "images/images"

out_data_path = "images/data"

In [None]:
if os.path.exists(out_data_path):
    os.mkdir(out_data_path)
    os.mkdir(os.path.join(out_data_path, "train"))
    os.mkdir(os.path.join(out_data_path, "test"))
    os.mkdir(os.path.join(out_data_path, "val"))

## Dataset splitting

In [None]:
import os
import random
import shutil

# Define paths
input_folder = 'images/images'
output_folder = 'data/'

# Create the output folder structure
os.makedirs(output_folder, exist_ok=True)
os.makedirs(os.path.join(output_folder, 'train'), exist_ok=True)
os.makedirs(os.path.join(output_folder, 'val'), exist_ok=True)
os.makedirs(os.path.join(output_folder, 'test'), exist_ok=True)

# Function to create class folders
def create_class_folders(folder_path):
    for class_name in os.listdir(input_folder):
        os.makedirs(os.path.join(folder_path, class_name), exist_ok=True)

# Count images per class
class_images = {}
for class_name in os.listdir(input_folder):
    class_images[class_name] = len([f for f in os.listdir(os.path.join(input_folder, class_name)) if f.endswith('.png')])
    print(f"Class {class_name}: {class_images[class_name]} images")

# Calculate number of images for each split per class
class_splits = {}
for class_name, count in class_images.items():
    if count < 3:
        print(f"Not enough images for class {class_name}. Skipping this class.")
        continue
    
    class_splits[class_name] = {
        'train': min(int(count * 0.8), count),
        'val': min(int(count * 0.1), count),
    }
    class_splits[class_name]['test'] = max(0, count - class_splits[class_name]['train'] - class_splits[class_name]['val'])

# Split images into train, val, and test sets for each class
class_file_lists = {}
for class_name in class_images.keys():
    if class_name not in class_splits:
        continue
    
    class_files = []
    for filename in os.listdir(os.path.join(input_folder, class_name)):
        if filename.endswith('.png'):
            class_files.append(filename)
    
    random.shuffle(class_files)
    
    train_files = class_files[:class_splits[class_name]['train']]
    val_files = class_files[class_splits[class_name]['train']:class_splits[class_name]['train']+class_splits[class_name]['val']]
    test_files = class_files[class_splits[class_name]['train']+class_splits[class_name]['val']:]
    
    class_file_lists[class_name] = {'train': train_files, 'val': val_files, 'test': test_files}

# Copy images to the appropriate folders
for class_name, file_list in class_file_lists.items():
    if not file_list['train'] or not file_list['val'] or not file_list['test']:
        print(f"Not enough images for class {class_name}. Skipping this class.")
        continue
    
    for split, files in [('train', file_list['train']), ('val', file_list['val']), ('test', file_list['test'])]:
        for filename in files:
            src_path = os.path.join(input_folder, class_name, filename)
            dst_folder = os.path.join(output_folder, split)
            dst_path = os.path.join(output_folder, split, class_name, filename)
            create_class_folders(dst_folder)
            shutil.copy2(src_path, dst_path)
            print(f"Copied {src_path} to {dst_path}")

print("\nData copying completed.")

In [None]:
import os

# Checking if dataset is correctly splitted

train_folder = 'data/train/'
eval_folder = 'data/test/'
validation_folder = 'data/val/'
input_folder = 'images/images'
for class_name in os.listdir(input_folder):
    train_images = len([f for f in os.listdir(os.path.join(train_folder, class_name)) if f.endswith('.png')])
    eval_images = len([f for f in os.listdir(os.path.join(eval_folder, class_name)) if f.endswith('.png')])
    validation_images = len([f for f in os.listdir(os.path.join(validation_folder, class_name)) if f.endswith('.png')])
    total_images = len([f for f in os.listdir(os.path.join(input_folder, class_name))if f.endswith('.png')])
    print(f'class: {class_name}; train {train_images}; eval {eval_images}; val {validation_images}; total {total_images}')


## Data Preprocessing

- Aplicar transformaciones a las imágenes (redimensionamiento, normalización, etc.)
- Dividir el conjunto de datos en entrenamiento, validación y prueba
- Crear los DataLoaders de PyTorch

### Transformaciones

In [None]:
# Definir las transformaciones
transform = transforms.Compose([
    transforms.Resize(256),  # Redimensionar a 256x256
    transforms.CenterCrop(224),  # Recortar el centro de la imagen a 224x224
    transforms.ToTensor(),  # Convertir a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizar
])

In [None]:
# Cargar una imagen de ejemplo
img = Image.open('imagen_ejemplo.jpg')
img

In [None]:
# Aplicar las transformaciones
img_transformada = transform(img)
img_transformada

In [None]:
# Mostrar la imagen transformada
plt.imshow(img_transformada.permute(1, 2, 0))  # Permutar los canales para que sea compatible con matplotlib
plt.show()

### DataLoaders

In [None]:
# Definir la clase Dataset para cargar las imágenes y etiquetas
class ImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = os.listdir(root_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = Image.open(img_path)

        if self.transform:
            image = self.transform(image)

        label = int(img_name.split('.')[0].split('_')[-1])  # Asignar etiqueta según el nombre del archivo
        return image, label



In [None]:
# Crear el conjunto de datos
dataset = ImageDataset('path/to/dataset', transform=transform)
dataset

In [None]:

# Crear el DataLoader
batch_size = 32
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Probar el DataLoader
for batch in dataloader:
    images, labels = batch
    print(images.shape, labels.shape)
    break

## Model Training 

- Definir la arquitectura del modelo
- Configurar el optimizador y la función de pérdida
- Entrenar el modelo y monitorear el progreso
- Guardar el modelo entrenado

## Model Evaluation

- Cargar el modelo entrenado
- Evaluar el modelo en el conjunto de prueba
- Calcular métricas de rendimiento
- Visualizar ejemplos de predicciones correctas e incorrectas