In [2]:
import pandas as pd
results_df = pd.read_csv('Results/hausdorff_dimensions - HSV.csv')

In [3]:
from scipy.stats import ttest_ind, mannwhitneyu, ks_2samp

In [5]:
import pandas as pd
from scipy.stats import shapiro, ks_2samp

benign_dims = results_df[results_df['Type'] == 'Benign']
malignant_dims = results_df[results_df['Type'] == 'Malignant']

benign_mole_dims = benign_dims['Mole_Hausdorff_Dim'].values
benign_border_dims = benign_dims['Border_Hausdorff_Dim'].values
malignant_mole_dims = malignant_dims['Mole_Hausdorff_Dim'].values
malignant_border_dims = malignant_dims['Border_Hausdorff_Dim'].values

# Shapiro-Wilk Test for Mole Dimensions
shapiro_benign_mole = shapiro(benign_mole_dims)
shapiro_malignant_mole = shapiro(malignant_mole_dims)

print("Shapiro-Wilk test for Mole Dimensions:")
print(f"Benign: Statistic = {shapiro_benign_mole.statistic}, P-value = {shapiro_benign_mole.pvalue}")
print(f"Malignant: Statistic = {shapiro_malignant_mole.statistic}, P-value = {shapiro_malignant_mole.pvalue}\n")

# Shapiro-Wilk Test for Border Dimensions
shapiro_benign_border = shapiro(benign_border_dims)
shapiro_malignant_border = shapiro(malignant_border_dims)

print("Shapiro-Wilk test for Border Dimensions:")
print(f"Benign: Statistic = {shapiro_benign_border.statistic}, P-value = {shapiro_benign_border.pvalue}")
print(f"Malignant: Statistic = {shapiro_malignant_border.statistic}, P-value = {shapiro_malignant_border.pvalue}\n")

# Kolmogorov-Smirnov Test for Mole Dimensions
ks_mole = ks_2samp(benign_mole_dims, malignant_mole_dims)
print("Kolmogorov-Smirnov test for Mole Dimensions:")
print(f"Statistic: {ks_mole.statistic}, P-value: {ks_mole.pvalue}\n")

# Kolmogorov-Smirnov Test for Border Dimensions
ks_border = ks_2samp(benign_border_dims, malignant_border_dims)
print("Kolmogorov-Smirnov test for Border Dimensions:")
print(f"Statistic: {ks_border.statistic}, P-value: {ks_border.pvalue}\n")


Shapiro-Wilk test for Mole Dimensions:
Benign: Statistic = 0.9513808890551118, P-value = 0.01443240878922426
Malignant: Statistic = 0.9491524445958296, P-value = 0.08350173870806059

Shapiro-Wilk test for Border Dimensions:
Benign: Statistic = 0.9889226153237349, P-value = 0.844191665524672
Malignant: Statistic = 0.9717961692373341, P-value = 0.4421826220817168

Kolmogorov-Smirnov test for Mole Dimensions:
Statistic: 0.2928153717627402, P-value: 0.026505830185109873

Kolmogorov-Smirnov test for Border Dimensions:
Statistic: 0.4402673350041771, P-value: 0.0001172707947570945



In [6]:
from scipy.stats import kstest
ks_benign_mole = kstest(benign_mole_dims, 'norm', args=(benign_mole_dims.mean(), benign_mole_dims.std()))

# One-Sample Kolmogorov-Smirnov Test for Border Hausdorff Dimensions against normal distribution
ks_benign_border = kstest(benign_border_dims, 'norm', args=(benign_border_dims.mean(), benign_border_dims.std()))

# Print results
print("One-Sample Kolmogorov-Smirnov test for Benign Mole Hausdorff Dimensions:")
print(f"Statistic: {ks_benign_mole.statistic}, P-value: {ks_benign_mole.pvalue}\n")

print("One-Sample Kolmogorov-Smirnov test for Benign Border Hausdorff Dimensions:")
print(f"Statistic: {ks_benign_border.statistic}, P-value: {ks_benign_border.pvalue}\n")

One-Sample Kolmogorov-Smirnov test for Benign Mole Hausdorff Dimensions:
Statistic: 0.10474972639437424, P-value: 0.4628823323032947

One-Sample Kolmogorov-Smirnov test for Benign Border Hausdorff Dimensions:
Statistic: 0.05755552135078701, P-value: 0.9774080112377901



In [13]:
# Mann-Whitney U Test for Mole Dimensions
mannwhitney_mole = mannwhitneyu(benign_mole_dims, malignant_mole_dims)
print("Mann-Whitney U test for Mole Dimensions:")
print(f"Statistic: {mannwhitney_mole.statistic}, P-value: {mannwhitney_mole.pvalue}\n")

Mann-Whitney U test for Mole Dimensions:
Statistic: 1507.0, P-value: 0.030033583845572087



In [14]:
# Mann-Whitney U Test for Mole Dimensions
mannwhitney_border = mannwhitneyu(benign_border_dims, malignant_border_dims)
print("Mann-Whitney U test for Mole Dimensions:")
print(f"Statistic: {mannwhitney_mole.statistic}, P-value: {mannwhitney_border.pvalue}\n")

Mann-Whitney U test for Mole Dimensions:
Statistic: 1507.0, P-value: 1.4993821801995776e-05



In [18]:
def display_image(image, title="Image"):
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 
    plt.title(title)
    plt.axis('off')
    plt.show()

In [24]:
def extract_hsv_features_with_visualization(input_dir, label):
    hsv_features = []
    
    for filename in tqdm(os.listdir(input_dir)):
        if filename.endswith(('.jpg', '.png', '.jpeg')):
            image_path = os.path.join(input_dir, filename)
            image = cv2.imread(image_path)

            display_image(image, title=f"Original Image - {filename}")

            hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

            display_image(hsv_image[:, :, 0], title=f"H Channel - {filename}")
            display_image(hsv_image[:, :, 1], title=f"S Channel - {filename}")
            display_image(hsv_image[:, :, 2], title=f"V Channel - {filename}")

            lower_black = np.array([0, 0, 0])       
            upper_black = np.array([180, 255, 30])  
            black_mask = cv2.inRange(cv2.cvtColor(image, cv2.COLOR_BGR2HSV), lower_black, upper_black)

            display_image(black_mask, title=f"Black Mask - {filename}")

            valid_hsv_values = hsv_image[black_mask == 0]  

            if valid_hsv_values.size > 0:  
                h_channel = valid_hsv_values[:, 0]
                s_channel = valid_hsv_values[:, 1]
                v_channel = valid_hsv_values[:, 2]

                mean_h = np.mean(h_channel)
                median_h = np.median(h_channel)
                std_h = np.std(h_channel)
                min_h = np.min(h_channel)
                max_h = np.max(h_channel)

                mean_s = np.mean(s_channel)
                median_s = np.median(s_channel)
                std_s = np.std(s_channel)
                min_s = np.min(s_channel)
                max_s = np.max(s_channel)

                mean_v = np.mean(v_channel)
                median_v = np.median(v_channel)
                std_v = np.std(v_channel)
                min_v = np.min(v_channel)
                max_v = np.max(v_channel)

                hsv_features.append({
                    'filename': filename,
                    'label': label,
                    'mean_h': mean_h,
                    'median_h': median_h,
                    'std_h': std_h,
                    'min_h': min_h,
                    'max_h': max_h,
                    'mean_s': mean_s,
                    'median_s': median_s,
                    'std_s': std_s,
                    'min_s': min_s,
                    'max_s': max_s,
                    'mean_v': mean_v,
                    'median_v': median_v,
                    'std_v': std_v,
                    'min_v': min_v,
                    'max_v': max_v
                })
    
    return hsv_features

In [7]:
benign_dir = r"C:\Users\Mia\Desktop\kpgm\Benign\HSV"
malignant_dir = r"C:\Users\Mia\Desktop\kpgm\Malignant\HSV"

benign_features = extract_hsv_features_with_visualization(benign_dir, 'benign')
malignant_features = extract_hsv_features_with_visualization(malignant_dir, 'malignant')

all_features = benign_features + malignant_features
features_df = pd.DataFrame(all_features)

output_csv = r"C:\Users\Mia\Desktop\mole_hsv_features_with_visuals_new.csv"
features_df.to_csv(output_csv, index=False)

print("HSV features extracted and saved to:", output_csv)

NameError: name 'extract_hsv_features_with_visualization' is not defined

## Analysis on images preprocessed with XYZ ##

In [9]:
features = ['mean', 'median', 'std', 'min', 'max']
features_df = pd.read_csv(r"C:\Users\Mia\Desktop\mole_hsv_features_with_visuals_new.csv")
statistical_results_hsv = {}

channels_hsv = ['h', 's', 'v']

for channel in channels_hsv:
    for feature in features:
        column_name = f'{feature}_{channel}'

        benign_values = features_df[features_df['label'] == 'benign'][column_name]
        malignant_values = features_df[features_df['label'] == 'malignant'][column_name]
        
        # Kolmogorov-Smirnov test to check normality
        ks_stat_benign, p_value_benign = ks_2samp(benign_values, benign_values)
        ks_stat_malignant, p_value_malignant = ks_2samp(malignant_values, malignant_values)
        
        if p_value_benign > 0.05 and p_value_malignant > 0.05:
            # If normal distribution, perform t-test
            stat, p_value = ttest_ind(benign_values, malignant_values)
            test_type = "t-test"
        else:
            # If not normally distributed, perform Mann-Whitney U test
            stat, p_value = mannwhitneyu(benign_values, malignant_values)
            test_type = "Mann-Whitney U"
        
        statistical_results_hsv[column_name] = {
            'test_type': test_type,
            'statistic': stat,
            'p_value': p_value
        }

results_df_hsv = pd.DataFrame(statistical_results_hsv).T
results_df_hsv.reset_index(inplace=True)
results_df_hsv.columns = ['Feature', 'Test Type', 'Test Statistic', 'P-value']

significant_results_hsv = results_df_hsv[results_df_hsv['P-value'] < 0.05]

print("Significant Results:")
print(significant_results_hsv)

print("\nAll Results:")
print(results_df_hsv)


Significant Results:
     Feature Test Type Test Statistic   P-value
3      min_h    t-test       5.570081       0.0
4      max_h    t-test      -3.985997  0.000129
5     mean_s    t-test       3.964945  0.000139
6   median_s    t-test         4.1175  0.000079
8      min_s    t-test       6.148547       0.0
12     std_v    t-test      -2.060754  0.041949
14     max_v    t-test      -2.686521  0.008469

All Results:
     Feature Test Type Test Statistic   P-value
0     mean_h    t-test       1.401406   0.16422
1   median_h    t-test       1.765372  0.080585
2      std_h    t-test      -0.482911  0.630225
3      min_h    t-test       5.570081       0.0
4      max_h    t-test      -3.985997  0.000129
5     mean_s    t-test       3.964945  0.000139
6   median_s    t-test         4.1175  0.000079
7      std_s    t-test      -1.770394   0.07974
8      min_s    t-test       6.148547       0.0
9      max_s    t-test      -1.388787  0.168014
10    mean_v    t-test        0.25862  0.796466
11  m

  res = hypotest_fun_out(*samples, **kwds)
