<a href="https://colab.research.google.com/github/Si-Rasti/Samp_img_handling/blob/main/Step2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opencv-python

In [None]:
# import necessary libraries
import cv2
from google.colab import drive
import os
import random
import shutil
# mount drive
drive.mount('/content/drive')

In [None]:
def has_black_frame(image_path, threshold=10):
    # threshold may be altered to finetune sensitivity
    # Load the image
    image = cv2.imread(image_path)

    # Convert the image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # define corner pixels
    #corners may be defined as any ratio of the height and weight
    height, width = gray_image.shape
    top_left = gray_image[0:int(height * 0.01), 0:int(width * 0.01)]
    bottom_left = gray_image[int(height * 0.99):, 0:int(width * 0.01)]
    top_right = gray_image[0:int(height * 0.01), int(width * 0.99):]
    bottom_right = gray_image[int(height * 0.99):, int(width * 0.99):]

    # calculate average pixel intensity in the margines
    avg_tl = cv2.mean(top_left)[0]
    avg_bl = cv2.mean(bottom_left)[0]
    avg_tr = cv2.mean(top_right)[0]
    avg_br = cv2.mean(bottom_right)[0]

    # Check if the average pixel value in each corner is below the threshold
    return avg_tl < threshold or avg_bl < threshold or avg_tr < threshold or avg_br < threshold



In [None]:
def copy_random_images_without_black_frame(src_folder, dest_folder, num_images):
    # Get a list of all image filenames in the source folder
    image_files = os.listdir(src_folder)

    # Filter out images with any black corner
    valid_images = [image for image in image_files if not has_black_frame(os.path.join(src_folder, image))]

    # Randomly select 'num_images' images from the list of valid images
    selected_images = random.sample(valid_images, num_images)
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    # Copy the eligible images to the destination
    for image in selected_images:
        src_path = os.path.join(src_folder, image)
        dest_path = os.path.join(dest_folder, image)
        shutil.copy(src_path, dest_path)


In [None]:
def test_allocate (origin_folder, insertion_folder, num):
    # Get a list of all image filenames in the source folder
    prim_image_files = os.listdir(origin_folder)


    # Randomly select 'num' images from the primarily selected images
    test_images = random.sample(prim_image_files, num)
    if not os.path.exists(insertion_folder):
        os.makedirs(insertion_folder)

    # Move the eligible images to the test folder
    for sample in test_images:
        origin_path = os.path.join(origin_folder, sample)
        insertion_path = os.path.join(insertion_folder, sample)
        shutil.move(origin_path, insertion_path)


In [None]:
# Allocate a folder to each of the 8 dermatologic lesion classes defined in the original CSV file
folder_names = ['AK', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'SCC', 'VASC']

# Number of images to select for train and test
# Numbers may be modified based on the needed sample size
train_images_per_folder = 125
test_images_num = 25


In [None]:
# Loop for each folder and copy to train and move to test folders
for folder_name in folder_names:
    src_folder = os.path.join('/content/drive/MyDrive/ISIC_2019_Training_Input', folder_name)
    dest_folder = os.path.join('/content/drive/MyDrive/ISIC_2019_Training_Input', folder_name)
    test_dest_folder = os.path.join(dest_folder, 'test')

    copy_random_images_without_black_frame(src_folder, dest_folder, train_images_per_folder)
    test_allocate(dest_folder, test_dest_folder, test_images_num)
