In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import random
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

2024-02-27 15:33:23.308196: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-27 15:33:23.308243: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-27 15:33:23.309734: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-27 15:33:23.320414: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def create_image_matrix(directory_path):
    """
    Create a DataFrame containing image paths from the specified directory.

    Parameters:
    - directory_path (str): Path to the directory containing subdirectories for different diseases.

    Returns:
    - DataFrame: DataFrame containing image paths, where columns represent different diseases.
    """

    # Get the list of folders (diseases)
    disease_folders = os.listdir(directory_path)

    # List of lists to store image paths
    all_image_paths = []

    # Iterate through each disease folder and add image paths to the list
    for disease in disease_folders:
        disease_path = os.path.join(directory_path, disease)
        # Check if it's a directory
        if os.path.isdir(disease_path):
            # List of image paths in the current disease folder
            image_paths = [os.path.join(disease_path, img) for img in os.listdir(disease_path)]
            # Add the list of image paths to the main list
            all_image_paths.append(image_paths)

    # Create a DataFrame from the list of lists
    df = pd.DataFrame(all_image_paths).transpose()

    # Rename columns with disease names
    df.columns = disease_folders

    # Fill empty entries with NaN to obtain a square matrix
    df = df.applymap(lambda x: x if pd.notna(x) else [np.nan] * len(df.columns))

    return df

In [3]:

train_directory = './data/archive/train'

In [None]:
def data_augmentation(image_matrix, num_augmentations=1, percentage_low=0.1, percentage_high=0.01):
    """
    Perform data augmentation on the input image matrix.

    Parameters:
    - image_matrix (DataFrame): DataFrame containing image paths, where columns represent different diseases.
    - num_augmentations (int): Number of augmentations to perform for each image.
    - percentage_low (float): Low threshold for determining augmentation percentage based on image count.
    - percentage_high (float): High threshold for determining augmentation percentage based on image count.

    Returns:
    - DataFrame: DataFrame containing the augmented images, where columns represent diseases.
    """

    # Determine the percentage of data augmentation based on the current image count
    image_count_dict = {}
    for disease in image_matrix.columns:
        image_count = image_matrix[disease].count()
        # Add the count to the dictionary
        image_count_dict[disease] = image_count

    augmented_images_list = []

    for disease in image_matrix.columns:
        current_images = image_count_dict[disease]
        if current_images < 1000:
            percentage = percentage_low
        else:
            percentage = percentage_high

        # Calculate the number of new images to generate
        num_new_images = int(percentage * current_images)

        # Perform data augmentation only if needed (more than 0 new images)
        if num_new_images > 0:
            # Get the list of image paths for the current disease
            image_paths = image_matrix[disease].dropna().tolist()

            # Create an image generator with data augmentation settings
            datagen = ImageDataGenerator(
                rescale=(1./255),
                shear_range=0.2,
                zoom_range=0.3,
                width_shift_range=0.2,
                height_shift_range=0.2,
                brightness_range=[0.2, 1.2],
                rotation_range=20,
                horizontal_flip=True,
            )

            for _ in range(num_new_images):
                # Filter out NaN entries
                valid_image_paths = [path for path in image_paths if isinstance(path, str)]
                if not valid_image_paths:
                    continue  # Skip if there are no valid image paths
                # Select a random image from the existing ones
                random_image_path = np.random.choice(valid_image_paths)
                img_array = np.expand_dims(cv2.imread(random_image_path), axis=0)

                # Perform data augmentation and add the new image to the list
                augmented_img = next(datagen.flow(img_array))[0]
                augmented_images_list.append({
                    'Disease': disease,
                    'Augmented_Image': augmented_img
                })

    # Create a DataFrame with the generated images
    augmented_images_df = pd.DataFrame(augmented_images_list)
    
    # Pivot the DataFrame to have diseases as columns
    final_df = augmented_images_df.pivot_table(index=augmented_images_df.index, columns='Disease', values='Augmented_Image', aggfunc='first')

    return final_df


In [None]:
new_images = data_augmentation(image_matrix)

In [None]:
new_images.head()

Disease,Acne and Rosacea Photos,Bullous Disease Photos,Eczema Photos,Melanoma Skin Cancer Nevi and Moles,Poison Ivy Photos and other Contact Dermatitis,Psoriasis pictures Lichen Planus and related diseases,Seborrheic Keratoses and other Benign Tumors,Tinea Ringworm Candidiasis and other Fungal Infections,Vascular Tumors
0,,,,,,,"[[[0.10196079, 0.1254902, 0.16862746], [0.1019...",,
1,,,,,,,"[[[0.17254902, 0.14117648, 0.16470589], [0.168...",,
2,,,,,,,"[[[0.10980393, 0.14509805, 0.20392159], [0.121...",,
3,,,,,,,"[[[0.59607846, 0.5803922, 0.68235296], [0.6039...",,
4,,,,,,,"[[[0.054901965, 0.011764707, 0.12941177], [0.0...",,


In [4]:
train_df = create_image_matrix('./data/archive/train')

In [5]:
train_df.head()

Unnamed: 0,Seborrheic Keratoses and other Benign Tumors,Acne and Rosacea Photos,Bullous Disease Photos,Psoriasis pictures Lichen Planus and related diseases,Melanoma Skin Cancer Nevi and Moles,Eczema Photos,Tinea Ringworm Candidiasis and other Fungal Infections,Vascular Tumors,Poison Ivy Photos and other Contact Dermatitis
0,./data/archive/train/Seborrheic Keratoses and ...,./data/archive/train/Acne and Rosacea Photos/a...,./data/archive/train/Bullous Disease Photos/bu...,./data/archive/train/Psoriasis pictures Lichen...,./data/archive/train/Melanoma Skin Cancer Nevi...,./data/archive/train/Eczema Photos/eczema-fing...,./data/archive/train/Tinea Ringworm Candidiasi...,./data/archive/train/Vascular Tumors/angiokera...,./data/archive/train/Poison Ivy Photos and oth...
1,./data/archive/train/Seborrheic Keratoses and ...,./data/archive/train/Acne and Rosacea Photos/p...,./data/archive/train/Bullous Disease Photos/de...,./data/archive/train/Psoriasis pictures Lichen...,./data/archive/train/Melanoma Skin Cancer Nevi...,./data/archive/train/Eczema Photos/eczema-fing...,./data/archive/train/Tinea Ringworm Candidiasi...,./data/archive/train/Vascular Tumors/venous-ma...,./data/archive/train/Poison Ivy Photos and oth...
2,./data/archive/train/Seborrheic Keratoses and ...,./data/archive/train/Acne and Rosacea Photos/p...,./data/archive/train/Bullous Disease Photos/he...,./data/archive/train/Psoriasis pictures Lichen...,./data/archive/train/Melanoma Skin Cancer Nevi...,./data/archive/train/Eczema Photos/eczema-trun...,./data/archive/train/Tinea Ringworm Candidiasi...,./data/archive/train/Vascular Tumors/cherry-an...,./data/archive/train/Poison Ivy Photos and oth...
3,./data/archive/train/Seborrheic Keratoses and ...,./data/archive/train/Acne and Rosacea Photos/p...,./data/archive/train/Bullous Disease Photos/pe...,./data/archive/train/Psoriasis pictures Lichen...,./data/archive/train/Melanoma Skin Cancer Nevi...,./data/archive/train/Eczema Photos/pompholyx-2...,./data/archive/train/Tinea Ringworm Candidiasi...,./data/archive/train/Vascular Tumors/lymphangi...,./data/archive/train/Poison Ivy Photos and oth...
4,./data/archive/train/Seborrheic Keratoses and ...,./data/archive/train/Acne and Rosacea Photos/h...,./data/archive/train/Bullous Disease Photos/ep...,./data/archive/train/Psoriasis pictures Lichen...,./data/archive/train/Melanoma Skin Cancer Nevi...,./data/archive/train/Eczema Photos/lichen-simp...,./data/archive/train/Tinea Ringworm Candidiasi...,./data/archive/train/Vascular Tumors/venous-la...,./data/archive/train/Poison Ivy Photos and oth...
