In [1]:
import os
import requests
import tarfile
import numpy as np
import cv2
from io import BytesIO
from skimage import io

def apply_logarithm(image):
    img_float = np.float32(image)
    log_image = np.log(1 + img_float)
    log_image = cv2.normalize(log_image, None, 0, 255, cv2.NORM_MINMAX)
    return np.uint8(log_image)

def apply_exponential(image):
    img_float = np.float32(image)
    img_float /= 255.0
    exp_image = np.exp(img_float) - 1
    exp_image = cv2.normalize(exp_image, None, 0, 255, cv2.NORM_MINMAX)
    return np.uint8(exp_image)

def apply_mean_filter(image, kernel_size=3):
    kernel = np.ones((kernel_size, kernel_size), np.float32) / (kernel_size ** 2)
    mean_filtered_image = cv2.filter2D(image, -1, kernel)
    return mean_filtered_image

def process_images_from_tar_gz(url, augmentedDatasetPath, fruitFolders):
    # Download the .tar.gz file in memory
    response = requests.get(url)
    response.raise_for_status()
    
    # Use BytesIO to treat the content as a file
    file_like_object = BytesIO(response.content)
    
    # Open the tar.gz file directly from the byte stream
    with tarfile.open(fileobj=file_like_object, mode='r:gz') as tar:
        # Get all members (files) from .tar.gz
        members = tar.getmembers()

        # Filter and sort the directories in alphabetical order
        directories = sorted([m for m in members if m.isdir()], key=lambda x: x.name)

        # Create the structure for the augmented dataset
        for fruit in fruitFolders:
            os.makedirs(os.path.join(augmentedDatasetPath, fruit), exist_ok=True)

        # Process the images and save them to the new dataset
        for directory in directories:
            fruit = directory.name.split('/')[-1]  # Get the fruit name from the directory path
            if fruit not in fruitFolders:
                continue  # Skip if the fruit is not in the list

            fruitFolderPath = directory.name
            augmentedFruitFolderPath = os.path.join(augmentedDatasetPath, fruit)

            # Get image files in the directory
            images = sorted([m for m in members if m.name.startswith(fruitFolderPath) and m.isfile()], key=lambda x: x.name)
            
            for img_member in images:
                file = tar.extractfile(img_member)
                if file:
                    # Read the image
                    image = io.imread(file)
                    if image is None:
                        continue  # Ignore if not an image

                    # Save the original image to the new dataset
                    imageName = img_member.name.split('/')[-1]
                    cv2.imwrite(os.path.join(augmentedFruitFolderPath, imageName), image)

                    # Apply transformations and save to the new dataset
                    logImage = apply_logarithm(image)
                    expImage = apply_exponential(image)
                    meanFilteredImage = apply_mean_filter(image)

                    # Create names for transformed images
                    logImageName = f"log_{imageName}"
                    expImageName = f"exp_{imageName}"
                    meanImageName = f"mean_{imageName}"

                    # Save the transformed images to the new dataset
                    cv2.imwrite(os.path.join(augmentedFruitFolderPath, logImageName), logImage)
                    cv2.imwrite(os.path.join(augmentedFruitFolderPath, expImageName), expImage)
                    cv2.imwrite(os.path.join(augmentedFruitFolderPath, meanImageName), meanFilteredImage)

    print("augmented_dataset criado com sucesso!")

url = 'https://github.com/GabrielSMartinelli/Proj_PDI/raw/main/fruits-DL-1024x768.tar.gz'
augmentedDatasetPath = "../augmented_dataset"
fruitFolders = ['apple', 'araticum', 'avocado', 'banana', 'butia', 'khaki', 'mango', 'orange', 'star_fruit', 'ugli_fruit']
process_images_from_tar_gz(url, augmentedDatasetPath, fruitFolders)


augmented_dataset criado com sucesso!
