Some functions deprecated here...

## <a>**Function definition**</a>

In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, save_img
from PIL import Image, ImageEnhance
import os
import matplotlib.pyplot as plt
import numpy as np
import kagglehub, os, shutil

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def add_uniform_noise(image, minval=-1, maxval=1):
    noise = tf.random.uniform(shape=tf.shape(image), minval=minval, maxval=maxval)
    noisy_image = image + noise
    return noisy_image

In [7]:
def augment_and_save_images(input_dir, output_dir, augmentations_per_image=5):
    datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,          # Rotation des images
        horizontal_flip=True,       # Symétrie horizontale
        fill_mode='nearest',        # Remplir les zones vides après transformation
    )
    if os.path.isdir(input_dir):
        for img_name in os.listdir(input_dir):
            img_path = os.path.join(input_dir, img_name)
            
            # Charger l'image
            img = load_img(img_path)
            img_array = img_to_array(img)
            # add noise
            img_array = add_uniform_noise(img_array)

            img_array = tf.reshape(img_array, (1,) + img_array.shape)  # Ajouter une dimension batch

            # Générer des augmentations et les sauvegarder
            count = 0
            for batch in datagen.flow(img_array, batch_size=1, save_to_dir=output_dir, save_prefix="aug", save_format="jpg"):
                count += 1
                if count >= augmentations_per_image:
                    break

In [8]:
def add_salt_and_pepper_noise(image, salt_prob=0.001, pepper_prob=0.001):
    image_np = image.numpy()
    num_salt = int(salt_prob * image_np.size)
    num_pepper = int(pepper_prob * image_np.size)

    # Add salt (white pixels)
    coords_salt = [np.random.randint(0, i - 1, num_salt) for i in image_np.shape]
    image_np[coords_salt[0], coords_salt[1], :] = 1

    # Add pepper (black pixels)
    coords_pepper = [np.random.randint(0, i - 1, num_pepper) for i in image_np.shape]
    image_np[coords_pepper[0], coords_pepper[1], :] = 0
    return image_np

In [9]:
def clear_data (data_dir:str) -> int:
    state = 0
    folder = data_dir
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
            state = 1
    return state


In [10]:
def reset_dataset (dst:str = '\\data\\current', src:str = '\\data\\archive') -> int:
    root = os.getcwd()
    dst = root + dst
    src = root + src
    if not os.path.exists(src) or not os.path.exists(dst):
        print (f'Error: {src} or {dst} not found')
        return 1
    try:
        # clear data from current data dir
        clear_data (dst)
        print("Dataset updating...")
        # merge train and test folder into train
        os.mkdir(os.path.join(dst, 'train'))
        os.mkdir(os.path.join(dst, 'train', '0'))
        os.mkdir(os.path.join(dst, 'train', '1'))
        for suffix in ('train', 'test'):
            for class_ in ('0', '1'):
                path_src = os.path.join(src, suffix, class_)
                path_dst = os.path.join(dst, 'train', class_)
                for item in os.listdir(path_src):
                    source_item = os.path.join(path_src, item)
                    destination_item = os.path.join(path_dst, item)
                    # Copy each item (handle files and directories)
                    if os.path.isdir(source_item):
                        shutil.copytree(source_item, destination_item, dirs_exist_ok=True)
                    else:
                        shutil.copy2(source_item, destination_item)
        # copy valid
        path_src = os.path.join(src, 'valid')
        path_dst = os.path.join(dst, 'valid')
        os.mkdir(path_dst)
        for item in os.listdir(path_src):
            source_item = os.path.join(path_src, item)
            destination_item = os.path.join(path_dst, item)
            # Copy each item (handle files and directories)
            if os.path.isdir(source_item):
                shutil.copytree(source_item, destination_item, dirs_exist_ok=True)
            else:
                shutil.copy2(source_item, destination_item)

        print("Dataset updated")
        return 0
    except Exception as e:
        print(f"Error: {str(e)}")
        return 1
  

In [11]:
def importlib_dataset_from_kagglehub(dataset_url:str = "hayder17/breast-cancer-detection", force:bool = True) -> int:
    dst = os.path.join(os.getcwd(), 'data', 'current')
    arch = os.path.join(os.getcwd(), 'data', 'archive')
    if not os.path.exists(arch) or not os.path.exists(dst):
        print (f'Error: {arch} or {dst} not found')
        return 1
        
    if not dataset_url:
        print("Error: Path to dataset is empty.")
        return 1

    try:
        clear_data (arch)
        # Extract dataset name from the path
        cache_path = kagglehub.dataset_download(dataset_url, force_download=force)
        # move from .cache to dst
        for item in os.listdir(cache_path):
            source_item = os.path.join(cache_path, item)
            destination_item = os.path.join(arch, item)

            # Copy each item (handle files and directories)
            if os.path.isdir(source_item):
                shutil.copytree(source_item, destination_item, dirs_exist_ok=True)
            else:
                shutil.copy2(source_item, destination_item)

        # copy dataset from archive to current
        reset_dataset()
    except Exception as e:
        print(f"Error: {str(e)}")
        return 1
    print("Dataset imported")
    return 0


## <a>**-- SCRIPT FROM HERE --**</a>

#### Reset dataset (data/current)

In [None]:
# dt.reset_dataset()

#### Augment training dataset

In [None]:
# dt.augment_and_save_images(os.path.join(os.getcwd(), 'data', 'archive', 'train', 'Covid'), os.path.join(os.getcwd(), 'data', 'current', 'train', 'Covid'), 2)
# dt.augment_and_save_images(os.path.join(os.getcwd(), 'data', 'archive', 'train', 'Normal'), os.path.join(os.getcwd(), 'data', 'current', 'train', 'Normal'), 2)
# dt.augment_and_save_images(os.path.join(os.getcwd(), 'data', 'archive', 'train', 'Viral Pneumonia'), os.path.join(os.getcwd(), 'data', 'current', 'train', 'Viral Pneumonia'), 2)

#### Augment testing dataset

In [None]:
# dt.augment_and_save_images(os.path.join(os.getcwd(), 'data', 'archive', 'test', 'Covid'), os.path.join(os.getcwd(), 'data', 'current', 'test', 'Covid'), 2)
# dt.augment_and_save_images(os.path.join(os.getcwd(), 'data', 'archive', 'test', 'Normal'), os.path.join(os.getcwd(), 'data', 'current', 'test', 'Normal'), 2)
# dt.augment_and_save_images(os.path.join(os.getcwd(), 'data', 'archive', 'test', 'Viral Pneumonia'), os.path.join(os.getcwd(), 'data', 'current', 'test', 'Viral Pneumonia'), 2)