<a href="https://colab.research.google.com/github/OrgadShlishman/TAU-DLinMedicalImaging/blob/main/DatasetTools.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install and import auxilary libraries and packages

In [None]:
# Imports and installations
!pip install gdown
!pip install fastdup -Uq

import gdown
import os
import shutil
import fastdup
import cv2

import matplotlib.pyplot as plt
import numpy as np
import albumentations as A

from albumentations.pytorch import ToTensorV2
from skimage import io, img_as_float, img_as_ubyte, filters

fastdup.__version__

## Download your dataset (RAVIR usage example)

In [None]:
# Downloading RAVIR dataset

file_id = '1ZlZoSStvE9VCRq3bJiGhQH931EF0h3hh'
url = f'https://drive.google.com/uc?id={file_id}'

output = 'ravir-dataset.zip'
gdown.download(url, output, quiet=False)

!unzip ravir-dataset.zip

## Sharpen Images

In [None]:
# My innovative feature

import os
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, img_as_float, img_as_ubyte, filters

class ImproveImage:
    def __init__(self, input_image_path, output_dir=None, power=15):
        """
        Initialize the ImproveImage class.

        Parameters:
        - input_image_path: str or list of str, path(s) to the image(s).
        - output_dir: str, optional, directory where images will be saved.
        """
        self.output_dir = output_dir
        self.power = power

        if isinstance(input_image_path, str):
            # Single image path
            self.input_image_paths = [input_image_path]
            self.images = [self.get_img(input_image_path)]
            self.sharp_images = [self.sharpen(self.images[0])]
        elif isinstance(input_image_path, list):
            # List of image paths
            self.input_image_paths = input_image_path
            self.images = [self.get_img(img_p) for img_p in input_image_path]
            self.sharp_images = [self.sharpen(img) for img in self.images]
        else:
            raise ValueError("input_image_path must be a string or a list of strings.")

    def get_img(self, image_path):
        """
        Load and preprocess the image.

        Parameters:
        - image_path: str, path to the image file.

        Returns:
        - numpy array, preprocessed image.
        """
        image = io.imread(image_path, as_gray=True)
        image = img_as_float(image)
        return image

    def sharpen(self, image):
        """
        Apply sharpening to the image.

        Parameters:
        - image: numpy array, input image.

        Returns:
        - numpy array, sharpened image.
        """
        power = self.power
        sharpened_image = filters.unsharp_mask(image, radius=1, amount=power)
        return sharpened_image


    def preview(self):
        """
        Preview the original and sharpened images.
        """
        for idx, (image, sharp_image) in enumerate(zip(self.images, self.sharp_images)):
            plt.figure(figsize=(10, 5))
            plt.subplot(1, 2, 1)
            plt.title(f"Original Image {idx+1}")
            plt.imshow(image, cmap='gray')
            plt.axis('off')

            plt.subplot(1, 2, 2)
            plt.title(f"Sharpened Image {idx+1}")
            plt.imshow(sharp_image, cmap='gray')
            plt.axis('off')

            plt.show()

    def save(self):
        """
        Save the sharpened images to the specified directory.
        """
        if not self.output_dir:
            raise ValueError("Output directory is not specified.")

        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        for image, input_path in zip(self.sharp_images, self.input_image_paths):
            file_name = os.path.basename(input_path)
            output_path = os.path.join(self.output_dir, file_name)
            io.imsave(output_path, img_as_ubyte(image))
            print(f"Sharpened image saved to: {output_path}")

### Usage example: Sharpen and display images

In [None]:
test_images
train_images

print(test_images)
print(train_images)

In [None]:

output_dir = '/content/RAVIR Dataset/test'
sample_img = test_images

power=15
improve_image_single = ImproveImage(sample_img, output_dir, power)
improve_image_single.preview()
improve_image_single.save()

output_dir = '/content/RAVIR Dataset/train/training_images'
sample_img = train_images

power=15
improve_image_single = ImproveImage(sample_img, output_dir, power)
improve_image_single.preview()
improve_image_single.save()

## Removing Folder

### Usage example

In [None]:
# Define the path to the folder you want to delete
folder_to_delete = '/content/MyDrive/MyDrive/Deep Learning/Project/DataSet/3_RAVIR Dataset/test'

# Check if the folder exists
if os.path.exists(folder_to_delete):
    # Delete the folder and all its contents
    shutil.rmtree(folder_to_delete)
    print(f"Folder {folder_to_delete} has been deleted.")
else:
    print(f"Folder {folder_to_delete} does not exist.")


Folder /content/MyDrive/MyDrive/Deep Learning/Project/DataSet/3_RAVIR Dataset/test has been deleted.


## Copying Folder content

### Usage example

In [None]:
import shutil
import os

# Define source and destination paths
source_folder = '/content/MyDrive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset/test'
destination_folder = '/content/MyDrive/MyDrive/Deep Learning/Project/DataSet/3_RAVIR Dataset/test'

# Ensure the destination folder is empty (or create it if it does not exist)
if os.path.exists(destination_folder):
    # Optionally, clear the destination folder if it contains files (use with caution)
    shutil.rmtree(destination_folder)
    os.makedirs(destination_folder)
else:
    os.makedirs(destination_folder)

# Copy the entire folder
shutil.copytree(source_folder, destination_folder, dirs_exist_ok=True)
print(f"Folder {source_folder} has been copied to {destination_folder}.")


Folder /content/MyDrive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset/test has been copied to /content/MyDrive/MyDrive/Deep Learning/Project/DataSet/3_RAVIR Dataset/test.


## Data Augmentation Generation

In [None]:

def augment_and_save_images(image_dir, mask_dir, output_image_dir, output_mask_dir, num_augments=24):
    # Define a set of augmentations
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.Rotate(limit=40, p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=20, p=0.5),
        A.GaussianBlur(p=0.3),
        A.RandomGamma(p=0.2),
        A.ElasticTransform(p=0.2),
        A.GridDistortion(p=0.2),
        A.CoarseDropout(max_holes=8, max_height=16, max_width=16, p=0.5),
        A.Normalize(mean=(0.5,), std=(0.5,)),
        ToTensorV2()
    ])

    # Ensure output directories exist
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)

    # List all image files
    image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]

    for image_name in image_files:
        image_path = os.path.join(image_dir, image_name)
        mask_name = image_name  # Assuming mask has the same name as the image
        mask_path = os.path.join(mask_dir, mask_name)

        # Read image and mask
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # Generate augmented images and masks
        for i in range(num_augments):
            augmented = transform(image=image, mask=mask)
            augmented_image = augmented['image'].numpy().transpose(1, 2, 0)  # Convert to numpy array
            augmented_mask = augmented['mask'].numpy().squeeze()  # Convert to numpy array

            # Convert from normalized [-1, 1] back to [0, 255] if needed
            augmented_image = ((augmented_image * 0.5 + 0.5) * 255).astype(np.uint8)
            augmented_mask = (augmented_mask * 255).astype(np.uint8)

            # Save the augmented image and mask
            aug_image_name = f"{os.path.splitext(image_name)[0]}_aug_{i+1}.png"
            aug_mask_name = f"{os.path.splitext(mask_name)[0]}_aug_{i+1}.png"

            cv2.imwrite(os.path.join(output_image_dir, aug_image_name), cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))
            cv2.imwrite(os.path.join(output_mask_dir, aug_mask_name), augmented_mask)

    print(f"Augmentation complete. Augmented images and masks saved to '{output_image_dir}' and '{output_mask_dir}' respectively.")


In [None]:
# optional: Use Google Drive for your input\output folder
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Usage example

In [None]:
image_dir = '/content/drive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset Aug2/train/images'
mask_dir = '/content/drive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset Aug2/train/mask'

output_image_dir = '/content/drive/MyDrive/Deep Learning/Project/DataSet/Aug Dataset/train/images'
output_mask_dir = '/content/drive/MyDrive/Deep Learning/Project/DataSet/Aug Dataset/train/mask'

output_image_dir = '/content/drive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset Aug2/train/images'
output_mask_dir = '/content/drive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset Aug2/train/mask'


augment_and_save_images(image_dir, mask_dir, output_image_dir, output_mask_dir, num_augments=2)


Augmentation complete. Augmented images and masks saved to '/content/drive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset Aug2/train/images' and '/content/drive/MyDrive/Deep Learning/Project/DataSet/RAVIR Dataset Aug2/train/mask' respectively.


## Backups

In [None]:

# Define paths
source_images_path = '/content/RAVIR Dataset/train/training_images'
source_masks_path = '/content/RAVIR Dataset/train/training_masks'

destination_images_path = source_images_path
destination_masks_path = source_masks_path


source_images_path = '/content/RAVIR Dataset/test'
destination_images_path = source_images_path


# Ensure destination folders exist
os.makedirs(destination_images_path, exist_ok=True)
os.makedirs(destination_masks_path, exist_ok=True)

# List of existing files
image_files = [f for f in os.listdir(source_images_path) if f.endswith('.png')]  # Adjust extension if needed
mask_files = [f for f in os.listdir(source_masks_path) if f.endswith('.png')]  # Adjust extension if needed

# Extract the number part from filenames and sort them
def extract_number(filename):
    try:
        # Assuming filenames are in the format 'IR_Case_X.png'
        return int(filename.split('_')[2].split('.')[0])
    except (IndexError, ValueError):
        raise ValueError(f"Filename format is incorrect: {filename}")

image_files = sorted(image_files, key=extract_number)
mask_files = sorted(mask_files, key=extract_number)

# Define a starting number for the new names
new_start_number = 84

# Copy and rename files
# for i, (image_file, mask_file) in enumerate(zip(image_files, mask_files), start=new_start_number):
for i, (image_file) in enumerate(zip(image_files), start=new_start_number):
    # Define new filenames
    new_image_filename = f'IR_Case_{i}.png'
    # new_mask_filename = f'IR_Case_{i}.png'

    # Define source and destination paths
    src_image_path = os.path.join(source_images_path, image_file)
    dst_image_path = os.path.join(destination_images_path, new_image_filename)
    # src_mask_path = os.path.join(source_masks_path, mask_file)
    # dst_mask_path = os.path.join(destination_masks_path, new_mask_filename)

    # Copy and rename files
    shutil.copy(src_image_path, dst_image_path)
    # shutil.copy(src_mask_path, dst_mask_path)

print("Files have been copied and renamed successfully.")


In [None]:
import os
import shutil

# Define paths
source_images_path = '/content/RAVIR Dataset/test'
destination_images_path = source_images_path

# Ensure destination folder exists
os.makedirs(destination_images_path, exist_ok=True)

# List of existing files
image_files = [f for f in os.listdir(source_images_path) if f.endswith('.png')]  # Adjust extension if needed

# Extract the number part from filenames and sort them
def extract_number(filename):
    try:
        # Assuming filenames are in the format 'IR_Case_X.png'
        return int(filename.split('_')[2].split('.')[0])
    except (IndexError, ValueError):
        raise ValueError(f"Filename format is incorrect: {filename}")

image_files = sorted(image_files, key=extract_number)

# Define a starting number for the new names
new_start_number = 84

# Copy and rename files
for i, image_file in enumerate(image_files, start=new_start_number):
    # Ensure the filename is a string
    if not isinstance(image_file, str):
        raise TypeError(f"Expected str, but got {type(image_file).__name__}")

    # Define new filename
    new_image_filename = f'IR_Case_{i}.png'

    # Define source and destination paths
    src_image_path = os.path.join(source_images_path, image_file)
    dst_image_path = os.path.join(destination_images_path, new_image_filename)

    # Copy and rename files
    shutil.copy(src_image_path, dst_image_path)

print("Files have been copied and renamed successfully.")


In [None]:
import os
import re

# Define the path to the folder
folder_path = '/content/RAVIR Dataset/train/training_images'

# List all files in the folder
files = os.listdir(folder_path)

# Define a pattern to extract numbers from filenames
pattern = re.compile(r'(\d+)\.png$')  # Adjust the regex if your files have a different extension

# Initialize a list to store full paths
filtered_paths = []

# Iterate over each file in the folder
for file in files:
    # Search for a number in the filename using regex
    match = pattern.search(file)

    if match:
        number = int(match.group(1))  # Extract the number and convert to integer

        # Check if the number is less than 84
        if number < 84:
            # Generate the full path and add to the list
            full_path = os.path.join(folder_path, file)
            filtered_paths.append(full_path)

# The variable `filtered_paths` now contains the list of image paths
print(filtered_paths)  # This will print the list of paths for verification

# If you need to use `filtered_paths` further, it’s already available in this variable

test_images = filtered_paths
# print(train_images)
# print(type(train_images))

In [None]:
import os
import re

# Define the path to the folder
folder_path = '/content/RAVIR Dataset/train/training_images'

# List all files in the folder
files = os.listdir(folder_path)

# Define a pattern to extract numbers from filenames
pattern = re.compile(r'(\d+)\.png$')  # Adjust the regex if your files have a different extension

# Initialize a list to store full paths
filtered_paths = []

# Iterate over each file in the folder
for file in files:
    # Search for a number in the filename using regex
    match = pattern.search(file)

    if match:
        number = int(match.group(1))  # Extract the number and convert to integer

        # Check if the number is less than 84
        if number < 60:
            # Generate the full path and add to the list
            full_path = os.path.join(folder_path, file)
            filtered_paths.append(full_path)

# The variable `filtered_paths` now contains the list of image paths
print(filtered_paths)  # This will print the list of paths for verification

# If you need to use `filtered_paths` further, it’s already available in this variable

train_images = filtered_paths
# print(train_images)
# print(type(train_images))