# Data augmentation to balance classes

In [21]:
import os
import numpy as np
from PIL import Image
import pandas as pd

In [22]:
def load_image_labels(csv_path):
    # Read the CSV file
    df = pd.read_csv(csv_path)
    
    # Create a dictionary of image labels
    # Assumes 'isic_id' column contains image names and 'target' column contains labels
    image_labels = dict(zip(df['isic_id'], df['target']))
    
    return image_labels

In [None]:
def crop_image(image, num_crops=4):

    # Get image dimensions
    width, height = image.size
    
    # Define crop sizes and positions
    crop_configs = [
        # Top left
        (0, 0, width // 2, height // 2),
        # Top right
        (width // 2, 0, width, height // 2),
        # Bottom left
        (0, height // 2, width // 2, height),
        # Bottom right
        (width // 2, height // 2, width, height)
    ]
    
    # Crop images
    cropped_images = []
    for x1, y1, x2, y2 in crop_configs[:num_crops]:
        # Crop the image
        crop = image.crop((x1, y1, x2, y2))
        
        # Resize crop back to original image size
        if crop.size != (width, height):
            crop = crop.resize((width, height), Image.LANCZOS)
        
        cropped_images.append(crop)
    
    return cropped_images

In [24]:
def augment_positive_class(images_dir, csv_path, output_dir):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Load image labels
    image_labels = load_image_labels(csv_path)
    
    # Counter for naming augmented images
    aug_counter = 0
    
    # Iterate through images in input directory
    for filename in os.listdir(images_dir):
        # Check if image is in the labels dictionary and is positive
        base_name = os.path.splitext(filename)[0]
        if base_name not in image_labels or image_labels[base_name] != 1:
            continue
        
        # Read image
        img_path = os.path.join(images_dir, filename)
        try:
            image = Image.open(img_path)
        except Exception as e:
            print(f"Could not read image: {img_path}. Error: {e}")
            continue
        
        # Generate crops
        cropped_images = crop_image(image)
        
        # Save cropped images
        for crop in cropped_images:
            aug_filename = f"augmented_{aug_counter}_{filename}"
            aug_path = os.path.join(output_dir, aug_filename)
            crop.save(aug_path)
            aug_counter += 1
    
    print(f"Augmentation complete. Generated {aug_counter} augmented images.")


In [None]:
csv_path = "../DATA/cancer/df_datos.csv"
images_dir = '../DATA/cancer/im'
output_dir = '../DATA/cancer_cropped'

In [26]:
augment_positive_class(images_dir,csv_path,output_dir)

Augmentation complete. Generated 1572 augmented images.
