In [None]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pywt
import random
import seaborn as sns
from PIL import Image
from sklearn.preprocessing import StandardScaler
import shutil

In [None]:
def apply_fourier_transform(image, show = False):
    
    # Apply Fourier Transform
    f = np.fft.fft2(image)
    fshift = np.fft.fftshift(f)
    magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1)  # Adding 1 to avoid log(0)

    # Calculate the average values in the high-frequency areas
    rows, cols = image.shape
    crow, ccol = rows // 2, cols // 2
    high_freq_magnitude = magnitude_spectrum[crow-30:crow+30, ccol-30:ccol+30]

    # Define blurriness based on threshold
    mean_magnitude = np.mean(high_freq_magnitude)

    if show:
        # Display the original image and magnitude spectrum
        plt.subplot(121), plt.imshow(image, cmap='gray')
        plt.title('Original Image'), plt.xticks([]), plt.yticks([])
        
        plt.subplot(122), plt.imshow(magnitude_spectrum, cmap='gray')
        plt.title('Magnitude Spectrum'), plt.xticks([]), plt.yticks([])
        plt.show()

    return mean_magnitude

In [None]:
def apply_wavelet_transform(image, wavelet='db4', show = False):
    # Compute wavelet transform
    coeffs2 = pywt.dwt2(image, wavelet)
    cA, (cH, cV, cD) = coeffs2

    # Compute the Wavelet Transform of the image
    coeffs2 = pywt.dwt2(image, wavelet)
    _, (cH, cV, cD) = coeffs2

    # Measure the energy of the detail coefficients
    energy = np.sum(cH**2) + np.sum(cV**2) + np.sum(cD**2)

    # Normalize by the size of the detail coefficients
    energy /= (cH.size + cV.size + cD.size)

    if show:
        # Plotting the original image and coefficients
        plt.figure(figsize=(12, 3))
        plt.subplot(141), plt.imshow(image, cmap='gray'), plt.title('Original Image'), plt.axis('off')
        plt.subplot(142), plt.imshow(cA, cmap='gray'), plt.title('Approximation'), plt.axis('off')
        plt.subplot(143), plt.imshow(cH, cmap='gray'), plt.title('Horizontal Detail'), plt.axis('off')
        plt.subplot(144), plt.imshow(cV, cmap='gray'), plt.title('Vertical Detail'), plt.axis('off')
        plt.tight_layout()
        plt.show()

    return energy

In [None]:
def laplacian_blurriness(image):    
    # Apply the Laplacian filter
    laplacian = cv2.Laplacian(image, cv2.CV_64F)
    
    # Compute the variance of the Laplacian
    variance = laplacian.var()
    
    return variance

In [None]:
def process_images(folder_path):
    
    data = {
        "image" : [],
        "laplacian" : [],
        "fourier" : [],
        "wavelet" : []
    }
    
    # Loop through all images in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            # Read the image
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path, 0)

            # Apply Laplacian filter and compute blurriness
            blurriness = laplacian_blurriness(image)
            fourier = apply_fourier_transform(image)
            wavelet = apply_wavelet_transform(image)

            # Save the scalar value in output
            data["image"].append(filename)
            data["laplacian"].append(blurriness)
            data["fourier"].append(fourier)
            data["wavelet"].append(wavelet)
            
    return pd.DataFrame(data)

In [None]:
def identify_outliers(image_blur_tuples, percentile_low=1, percentile_high=100):
    # Convert blur_values to a NumPy array for percentile calculation
    blur_values = np.array([item[1] for item in image_blur_tuples])

    # Calculate lower and upper bounds based on percentiles
    lower_bound = np.percentile(blur_values, percentile_low)
    upper_bound = np.percentile(blur_values, percentile_high)

    # Identify outliers
    outliers = [(name, blur) for name, blur in image_blur_tuples if blur < lower_bound or blur > upper_bound]

    # Print the names and blur values of outliers
    for name, blur in outliers:
        print(f"Image: {name}, Blurriness: {blur}")

In [None]:
def plot_top_images(image_blur_tuples, input_folder, top_n=16, reverse = False):
    # Sort the tuples based on blur values in descending order
    sorted_tuples = sorted(image_blur_tuples, key=lambda x: x[1], reverse=reverse)

    # Take the top N tuples
    top_tuples = sorted_tuples[:top_n]
    num_row = top_n // 4

    # Plot the images in a 4 by 4 grid
    fig, axes = plt.subplots(num_row, 4, figsize=(40, num_row * 10))

    for i, (name, blur) in enumerate(top_tuples):
        # Load and plot the image
        image_path = input_folder + name 
        image = cv2.imread(image_path)
        axes[i // 4, i % 4].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        axes[i // 4, i % 4].set_title(f"Blur: {blur:.2f} of image {name} " )
        axes[i // 4, i % 4].axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
def calculate_statistics(image_blur_tuples):
    # Extract blur values from the tuple list
    blur_values = np.array([item[1] for item in image_blur_tuples])

    # Calculate common statistics
    mean_blur = np.mean(blur_values)
    median_blur = np.median(blur_values)
    std_dev_blur = np.std(blur_values)
    min_blur = np.min(blur_values)
    max_blur = np.max(blur_values)

    # Print the statistics
    print(f"Mean Blur: {mean_blur:.2f}")
    print(f"Median Blur: {median_blur:.2f}")
    print(f"Standard Deviation of Blur: {std_dev_blur:.2f}")
    print(f"Minimum Blur: {min_blur:.2f}")
    print(f"Maximum Blur: {max_blur:.2f}")

In [None]:
def plot_image_and_blur(image_name, input_folder, image_blur_tuples):
    # Find the tuple with the specified image name
    matching_tuple = next((item for item in image_blur_tuples if item[0] == image_name), None)

    if matching_tuple:
        # Load and plot the image
        image_path =input_folder + matching_tuple[0]
        image = cv2.imread(image_path)
        plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        plt.title(f"Image: {image_name}\nBlur: {matching_tuple[1]:.2f}")
        plt.show()
    else:
        print(f"Image with name '{image_name}' not found in the tuple list.")

In [None]:
ISIC = pd.read_csv("DataFrames/label.csv")

In [None]:
input_folder = '../../Datasets/ISIC/ISIC_2019_Training_Input/'

In [None]:
results = pd.read_csv("DataFrames/preprocessing_results.csv")
results

In [None]:
#results = process_images(input_folder)
#results

In [None]:
results.to_csv("DataFrames/preprocessing_results.csv" , index = False)

In [None]:
# Dropping the 'image' column before applying Standard Scaler
results_to_scale = results.drop(columns=['image'])

# Applying Standard Scaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(results_to_scale)

# Summing the scaled columns
results_scaled = pd.DataFrame(scaled_data, columns=results_to_scale.columns)
results_scaled['sum'] = results_scaled.sum(axis=1)

# Adding the 'image' column back to the DataFrame
results_scaled['image'] = results['image']

results_scaled

In [None]:
for column in columns:
    print(column)
    
    data = list(zip(list(results_scaled["image"]), list(results_scaled[column])))
    plot_top_images(data, input_folder, top_n=52)

In [None]:
for column in columns:
    print(column)
    
    data = list(zip(list(results["image"]), list(results[column])))
    plot_top_images(data, input_folder, top_n=16, reverse = True)

In [None]:
columns = ["laplacian", "fourier", "wavelet", "sum"]

low_images = pd.DataFrame()

for column in columns:
    low_images = pd.concat([low_images, results_scaled.sort_values(column).head(50)], axis = 0)

low_images

In [None]:
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
new_directory = os.path.join(desktop_path, "BlurredImages")

if not os.path.exists(new_directory):
    os.makedirs(new_directory)

In [None]:
for img_name in np.unique(low_images.image):
    source_file = '/data/cino/Datasets/ISIC/ISIC_2019_Training_Input/' + img_name 
    destination_file = os.path.join(new_directory, img_name)
    shutil.copy(source_file, destination_file)

print(f"Images copied to {new_directory}")

In [None]:
# manually selected images
for img_name in ["ISIC_0010021.jpg","ISIC_0025950.jpg","ISIC_0026121.jpg","ISIC_0026894.jpg","ISIC_0026991.jpg","ISIC_0027181.jpg","ISIC_0029098.jpg","ISIC_0030291.jpg","ISIC_0033408.jpg","ISIC_0033535.jpg","ISIC_0062612.jpg","ISIC_0063587.jpg","ISIC_0065099.jpg","ISIC_0067400.jpg","ISIC_0067686.jpg","ISIC_0069507.jpg","ISIC_0071438.jpg","ISIC_0072611.jpg"]:
    source_file = '/data/cino/Datasets/ISIC/ISIC_2019_Training_Input/' + img_name 
    destination_file = os.path.join(new_directory, img_name)
    shutil.copy(source_file, destination_file)

print(f"Images copied to {new_directory}")