In [None]:
import sys

sys.path.append("..")

In [None]:
import os
import cv2 as cv

In [None]:
folder_normal_image_name = "../data/clear_and_cropped_image_data/normal/"
folder_malignant_image_name = "../data/clear_and_cropped_image_data/malignant/"

normal_image_names = os.listdir(folder_normal_image_name)
malignant_image_names = os.listdir(folder_malignant_image_name)

normal_image_paths = [folder_normal_image_name + path for path in os.listdir(folder_normal_image_name)]
malignant_image_paths = [folder_malignant_image_name + path for path in os.listdir(folder_malignant_image_name)]

In [None]:
import numpy as np
import pandas as pd

In [None]:
from src.feature_extraction.extract_figure_features import extract_figure_features

figure_features = extract_figure_features()

In [None]:
figure_features

In [None]:
def extract_image_feature(image, label):
    return {
        f"{label}_mean": image.mean(),
        f"{label}_std": image.std(),
        f"{label}_min": image.min(),
        f"{label}_max": image.max(),
    }

In [None]:
all_image_paths = normal_image_paths + malignant_image_paths
all_image_name = normal_image_names + malignant_image_names

features = []

for i, image_path in enumerate(all_image_paths):
    image_number = all_image_name[i][all_image_name[i].find("_") + 1 : all_image_name[i].rfind(".")]
    image = cv.imread(image_path, cv.IMREAD_GRAYSCALE)

    blurred_image = cv.GaussianBlur(image, (5, 5), 1.4)

    curr_image_features = {"name": image_number}
    curr_image_features.update(extract_image_feature(image, "default"))

    curr_image_features.update(extract_image_feature(blurred_image, "blur"))

    sobel_x = cv.Sobel(blurred_image, cv.CV_64F, 1, 0, ksize=3)
    sobel_y = cv.Sobel(blurred_image, cv.CV_64F, 0, 1, ksize=3)
    abs_sobel_x = cv.convertScaleAbs(sobel_x)
    abs_sobel_y = cv.convertScaleAbs(sobel_y)
    sobel_grad = cv.addWeighted(abs_sobel_x, 0.5, abs_sobel_y, 0.5, 0)
    curr_image_features["sobel_mean"] = sobel_grad.mean()
    curr_image_features["sobel_std"] = sobel_grad.std()

    laplacian = cv.Laplacian(blurred_image, cv.CV_64F).astype(np.uint8)
    curr_image_features["laplacian_mean"] = laplacian.mean()
    curr_image_features["laplacian_std"] = laplacian.std()

    sigma = 0.9
    v = np.median(blurred_image)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    canny = cv.Canny(blurred_image, lower, upper, L2gradient=True)
    curr_image_features["canny_mean"] = canny.mean()
    curr_image_features["canny_std"] = canny.std()

    curr_image_features.update(figure_features[image_number])

    if image_path in normal_image_paths:
        curr_image_features["target"] = 0
    else:
        curr_image_features["target"] = 1

    features.append(curr_image_features)

In [None]:
features

In [1]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 3, figsize=(12, 8))

image = cv.imread(normal_image_paths[20], cv.IMREAD_GRAYSCALE)

blurred = cv.GaussianBlur(image, (5, 5), 1.4)

sobel_x = cv.Sobel(blurred, cv.CV_64F, 1, 0, ksize=3)
sobel_y = cv.Sobel(blurred, cv.CV_64F, 0, 1, ksize=3)
abs_sobel_x = cv.convertScaleAbs(sobel_x)
abs_sobel_y = cv.convertScaleAbs(sobel_y)
grad = cv.addWeighted(abs_sobel_x, 0.5, abs_sobel_y, 0.5, 0)

laplacian = cv.Laplacian(blurred, cv.CV_64F).astype(np.uint8)

sigma = 0.9
v = np.median(blurred)
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
canny = cv.Canny(blurred, lower, upper, L2gradient=True)

images = [image, blurred, grad, laplacian, canny]
titles = ["image", "blur", "sobel", "laplacian", "canny"]
for ax, img, title in zip(axes.flatten(), images, titles):
    ax.imshow(img, cmap="gray")
    ax.set_title(title)
    ax.axis("off")

axes[1, 2].axis("off")

plt.tight_layout()
plt.show()

In [None]:
df = pd.DataFrame(features)
df.head()

In [None]:
df.shape

In [None]:
df["target"].value_counts()

In [None]:
len(normal_image_names), len(malignant_image_names)

In [None]:
df.isna().sum()