In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
ls

[0m[01;34mdrive[0m/  [01;34msample_data[0m/


In [3]:
cd drive/MyDrive/HIDDEN_DATASET

/content/drive/MyDrive/HIDDEN_DATASET


# This script will perform the following preprocessing steps:

  1. Take the image from a folder and apply to it:
  - Median filtering 
  - Bilateral filtering
  2. Save the image to a folder named "Hand_Made_Preprocessed/Original_Folder_Name"

### Utils

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os

In [5]:
def pre_process_image(original_img, gaussian_filtering=False, median_filtering=False ,show_image=False, apply_bilateral= False, sharpen_image=False, fontsize=10, figsize=8):

    # convert to grayscale
    img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
    
    if median_filtering:
        # apply median filtering
        img = cv2.medianBlur(img, 5)
    
    # apply bilateral filtering
    if apply_bilateral:
        # the input of bilateral filter is the image, diameter of each pixel neighborhood, sigmaColor, sigmaSpace
        # their meaning is: 
        # diameter of each pixel neighborhood - how large the area of each pixel is considered when filtering
        # sigmaColor - how large the color range is considered when filtering
        # sigmaSpace - how large the distance range is considered when filtering
        
        img = cv2.bilateralFilter(img, 15, 2, 2)
    
    
    if gaussian_filtering:
        # apply gaussian blur
        img = cv2.GaussianBlur(img, (3, 3), 0)
    
    preprocessed_img = img

    if sharpen_image:
        # sharpen image
        kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
        preprocessed_img = cv2.filter2D(preprocessed_img, -1, kernel)

    if show_image:
        # show original image alongside preprocessed image
        fig, ax = plt.subplots(1, 2, figsize=(figsize, figsize))
        ax[0].imshow(original_img)
        ax[0].set_title('Original Image', fontsize=fontsize)
        ax[1].imshow(preprocessed_img, cmap='gray')
        # Compose a string to show which transformations were applied
        transformation_string =  ''
        if apply_bilateral:
            transformation_string += 'Bilateral Filtering, '
        if median_filtering:
            transformation_string += 'Median Filtering, '
        if gaussian_filtering:
            transformation_string += 'Gaussian Filtering, '
        if sharpen_image:
            transformation_string += 'Sharpening, '
        transformation_string = transformation_string[:-2]
        ax[1].set_title('Preprocessed Image\n' + transformation_string, fontsize=fontsize)
        
    return preprocessed_img


In [6]:
def pre_process_whole_folder(dataset_name, require_user_input=True):
    i=0
    # for each folder in path
    for pathology_folder in os.listdir(dataset_name):

        # remove .ini file from list
        if pathology_folder.endswith('.ini'):
            continue

        # for each image in pathology folder
        for image in os.listdir(dataset_name + '/' + pathology_folder):
            
            # remove .ini file from list
            if image.endswith('.ini'):
                continue

            # check wether actually is an image, ends with .jpeg or .png in case is not it outputs an error saying the filename
            if not image.endswith('.jpeg') and not image.endswith('.png'):
                print('filename: ' + image)
                raise Exception('not an image')

            # read image
            img = cv2.imread(dataset_name +'/'+ pathology_folder + '/' + image)
            # preprocess image
            processed_img = pre_process_image(img, median_filtering=True, apply_bilateral=True, show_image=False)
            # save image

            ### CAMBIARE I PATH CHE SEGUONO ###
            destination_folder = "RIPULITE" #PROVARE PER CREDERE

            destination_folder_path  = "/content/drive/MyDrive/HIDDEN_DATASET/"+ destination_folder + '/' + dataset_name + '/' + pathology_folder
            destination_image_path   = destination_folder_path + '/' + image


            # create folder if not exists
            if not os.path.exists(destination_folder_path):
                os.makedirs(destination_folder_path)
                print('created folder ' + destination_folder_path)
            
            # write the image and check if it was successful
            assert cv2.imwrite(destination_image_path, processed_img)
            
            
            i=i+1
            print('processed '+str(i)+' images: ' + destination_image_path, end='\r')
            
            if require_user_input:
                # ask user to continue
                answer = input('Continue? (y/n)')
                if answer == 'n':
                    break
                elif answer == 'y':
                    continue

        # print progress in % and indicate pathology
        print('processed ' + str(round((i/len(os.listdir(dataset_name))), 1)) + ' of ' + dataset_name + ' dataset')

### Pre-process

In [7]:
# SPECIFICARE LA CARTELLA DA CUI PRENDERE LE IMMAGINI SPORCHE
pre_process_whole_folder('SPORCHE', require_user_input=False) # da testare prima di runnare totalmente, una volta fatto impostare require_user_input= False

created folder /content/drive/MyDrive/HIDDEN_DATASET/RIPULITE/SPORCHE/all
processed 1120.0 of SPORCHE dataset
