In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import img_as_ubyte
from skimage.filters import gabor
from skimage.feature import greycomatrix, greycoprops
from sklearn.decomposition import PCA
from skimage.segmentation import active_contour
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, classification_report

In [3]:
def extract_features(image):
    # GLCM features
    image_rescaled = image.astype(np.uint8)  # Ensure the image type is suitable for GLCM
    glcm = greycomatrix(image_rescaled, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
    energy = greycoprops(glcm, 'energy')
    homogeneity = greycoprops(glcm, 'homogeneity')
    contrast = greycoprops(glcm, 'contrast')
    correlation = greycoprops(glcm, 'correlation')

    # Gabor features
    gabor_filters_real, gabor_filters_imag = gabor(image_rescaled, frequency=0.6)
    gabor_energy = np.mean(np.sqrt(gabor_filters_real**2 + gabor_filters_imag**2))
    gabor_amplitude = np.mean(np.abs(gabor_filters_real))

    # Color features
    img_rgb = cv2.cvtColor(image_rescaled, cv2.COLOR_GRAY2RGB)  # Assuming input is grayscale
    mean_rgb = np.mean(img_rgb, axis=(0, 1))
    std_rgb = np.std(img_rgb, axis=(0, 1))

    # Ensure all features are flattened to 1D arrays
    energy = np.ravel(energy)
    homogeneity = np.ravel(homogeneity)
    contrast = np.ravel(contrast)
    correlation = np.ravel(correlation)
    mean_rgb = mean_rgb.flatten()
    std_rgb = std_rgb.flatten()

    # Concatenate all features into a single feature vector
    feature_vector = np.concatenate([energy, homogeneity, contrast, correlation,
                                     [gabor_energy, gabor_amplitude],
                                     mean_rgb, std_rgb])
    return feature_vector


In [4]:
def segment_lesion(image):
    # Convert RGB image to YCbCr color space
    image_YCbCr = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)

    # Create lesion masks based on thresholds
    mean_Cr = np.mean(image_YCbCr[:, :, 1])
    threshold = 110

    white_lesion_mask = np.zeros_like(image_YCbCr[:, :, 2], dtype=np.uint8)
    red_lesion_mask = np.zeros_like(image_YCbCr[:, :, 1], dtype=np.uint8)

    if mean_Cr < threshold:
        # White lesion mask using Cb intensity
        mean_Cb = np.mean(image_YCbCr[:, :, 2])
        white_lesion_mask[image_YCbCr[:, :, 2] > mean_Cb] = 255
    else:
        # Red lesion mask using Cr intensity
        mean_Cr = np.mean(image_YCbCr[:, :, 1])
        red_lesion_mask[image_YCbCr[:, :, 1] > mean_Cr] = 255

    # Active contour segmentation on original image
    white_lesion = active_contour(image, white_lesion_mask)
    red_lesion = active_contour(image, red_lesion_mask)

    
    return white_lesion, red_lesion


In [5]:
images_directory = r"E:\ocd\Images"
image_files = os.listdir(images_directory)

# Filter images containing 'oral_cancer' in their filenames
cancerous_image_files = [filename for filename in image_files if 'oral_cancer' in filename]
non_cancerous_image_files = [filename for filename in image_files if 'oral_cancer' not in filename]

cancerous_image_paths = [os.path.join(images_directory, filename) for filename in cancerous_image_files]
non_cancerous_image_paths = [os.path.join(images_directory, filename) for filename in non_cancerous_image_files]

all_image_paths = cancerous_image_paths + non_cancerous_image_paths

X_features = []
all_labels = []

for img_path in all_image_paths:
    try:
        img = cv2.imread(img_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        print(img_path)
    
        # Segmentation of lesions
        white_lesion, red_lesion = segment_lesion(img_rgb)

        # Extract features from segmented lesions 
        features_white_lesion = extract_features(white_lesion)
        features_red_lesion = extract_features(red_lesion)

        # Combine features from both types of lesions
        combined_features = np.concatenate([features_white_lesion, features_red_lesion])

        # Append to feature list
        X_features.append(combined_features)

        # Assign labels based on 'oral_cancer' substring in filename
        if 'oral_cancer' in img_path:
            all_labels.append('Cancerous')
        else:
            all_labels.append('Non-cancerous')
        print("done")
    except Exception as e:
        print(f"Error processing {img_path}: {str(e)}")

X_features = np.array     (X_features)
all_labels = np.array(all_labels)
print("done")


E:\ocd\Images\oral_cancer (1).jpeg
done
E:\ocd\Images\oral_cancer (1).jpg
done
E:\ocd\Images\oral_cancer (1).png
done
E:\ocd\Images\oral_cancer (10).jpeg
done
E:\ocd\Images\oral_cancer (10).jpg
done
E:\ocd\Images\oral_cancer (10).png
done
E:\ocd\Images\oral_cancer (11).jpg
done
E:\ocd\Images\oral_cancer (11).png
done
E:\ocd\Images\oral_cancer (12).jpg
done
E:\ocd\Images\oral_cancer (12).png
done
E:\ocd\Images\oral_cancer (13).jpg
done
E:\ocd\Images\oral_cancer (13).png
done
E:\ocd\Images\oral_cancer (14).jpg
done
E:\ocd\Images\oral_cancer (14).png
done
E:\ocd\Images\oral_cancer (15).jpg
done
E:\ocd\Images\oral_cancer (15).png
done
E:\ocd\Images\oral_cancer (16).jpg
done
E:\ocd\Images\oral_cancer (17).jpg
done
E:\ocd\Images\oral_cancer (18).jpg
done
E:\ocd\Images\oral_cancer (19).jpg
done
E:\ocd\Images\oral_cancer (2).jpeg
done
E:\ocd\Images\oral_cancer (2).jpg
done
E:\ocd\Images\oral_cancer (2).png
done
E:\ocd\Images\oral_cancer (20).jpg
done
E:\ocd\Images\oral_cancer (21).jpg
done
E:\

done
E:\ocd\Images\noncancer (53).PNG
done
E:\ocd\Images\noncancer (54).PNG
done
E:\ocd\Images\noncancer (55).PNG
done
E:\ocd\Images\noncancer (56).PNG
done
E:\ocd\Images\noncancer (57).PNG
done
E:\ocd\Images\noncancer (58).PNG
done
E:\ocd\Images\noncancer (59).PNG
done
E:\ocd\Images\noncancer (6).jpg
done
E:\ocd\Images\noncancer (6).PNG
done
E:\ocd\Images\noncancer (60).PNG
done
E:\ocd\Images\noncancer (61).PNG
done
E:\ocd\Images\noncancer (62).PNG
done
E:\ocd\Images\noncancer (63).PNG
done
E:\ocd\Images\noncancer (64).PNG
done
E:\ocd\Images\noncancer (65).PNG
done
E:\ocd\Images\noncancer (66).PNG
done
E:\ocd\Images\noncancer (67).PNG
done
E:\ocd\Images\noncancer (68).PNG
done
E:\ocd\Images\noncancer (69).PNG
done
E:\ocd\Images\noncancer (7).jpg
done
E:\ocd\Images\noncancer (7).PNG
done
E:\ocd\Images\noncancer (70).PNG
done
E:\ocd\Images\noncancer (71).PNG
done
E:\ocd\Images\noncancer (72).PNG
done
E:\ocd\Images\noncancer (73).PNG
done
E:\ocd\Images\noncancer (74).PNG
done
E:\ocd\Imag

In [6]:
feature_names = [f"feature_{i}" for i in range(X_features.shape[1])]  # Naming each feature
data = pd.DataFrame(X_features, columns=feature_names)
data['label'] = all_labels  # Add labels as a new column

# Save DataFrame to a CSV file
data.to_csv('features_labels.csv', index=False)
print("saved")

saved


In [34]:
k =10

# Initialize PCA and feature selection steps
pca = PCA()
selector = SelectKBest(score_func=f_classif, k=48)


pipeline = Pipeline([
    ('pca', pca),
    ('selector', selector)
])

# Fit the pipeline to your features and transform the data
X_features_selected = pipeline.fit_transform(X_features, all_labels)

# Split data into training and testing sets after PCA and feature selection
X_train_selected, X_test_selected, y_train_selected, y_test_selected = train_test_split(
    X_features_selected, all_labels, test_size=0.2, random_state=42)

In [35]:
# Naive Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_selected, y_train_selected)
nb_predictions = nb_classifier.predict(X_test_selected)
nb_accuracy = accuracy_score(y_test_selected, nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)

Naive Bayes Accuracy: 0.5294117647058824


In [36]:
svm_classifier = SVC(kernel='rbf', gamma='auto')  
svm_classifier.fit(X_train_selected, y_train_selected)
svm_predictions = svm_classifier.predict(X_test_selected)
svm_accuracy = accuracy_score(y_test_selected, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

SVM Accuracy: 0.6078431372549019


In [40]:
# K-Nearest Neighbors (KNN) Classifier
knn = KNeighborsClassifier(n_neighbors=4)
knn.fit(X_train_selected, y_train_selected)
knn_pred = knn.predict(X_test_selected)
knn_accuracy = accuracy_score(y_test_selected, knn_pred)
print("KNN Accuracy:", knn_accuracy)

KNN Accuracy: 0.7058823529411765


In [48]:
# Generate the confusion matrix for KNN Classifier
conf_matrix_knn = confusion_matrix(y_test_selected, knn_pred)
print("Confusion Matrix for KNN Classifier:")
print(conf_matrix_knn)

# Generate the confusion matrix for SVM Classifier
conf_matrix_svm = confusion_matrix(y_test_selected, svm_predictions)
print("\nConfusion Matrix for SVM Classifier:")
print(conf_matrix_svm)

# Generate the confusion matrix for Naive Bayes Classifier
conf_matrix_nb = confusion_matrix(y_test_selected, nb_predictions)
print("\nConfusion Matrix for Naive Bayes Classifier:")
print(conf_matrix_nb)


Confusion Matrix for KNN Classifier:
[[20  5]
 [10 16]]

Confusion Matrix for SVM Classifier:
[[ 9 16]
 [ 4 22]]

Confusion Matrix for Naive Bayes Classifier:
[[20  5]
 [19  7]]


In [73]:
# Assuming you have trained KNN_classifier, SVM_classifier, and NB_classifier earlier

# Load the test image
test_image_path = r"E:\ocd\Images\oral_cancer (43).jpg"
test_img = cv2.imread(test_image_path)
test_img_rgb = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB)

# Segmentation of lesions
white_lesion, red_lesion = segment_lesion(test_img_rgb)

# Extract features from segmented lesions 
features_white_lesion = extract_features(white_lesion)
features_red_lesion = extract_features(red_lesion)

# Combine features from both types of lesions
test_combined_features = np.concatenate([features_white_lesion, features_red_lesion])

# Predict using KNN
knn_prediction = knn.predict([test_combined_features])

# Predict using SVM
svm_prediction = svm_classifier.predict([test_combined_features])

# Predict using Naive Bayes
nb_prediction = nb_classifier.predict([test_combined_features])
print(f"KNN Prediction: {knn_prediction[0]}")
print(f"SVM Prediction: {svm_prediction[0]}")
print(f"Naive Bayes Prediction: {nb_prediction[0]}")


KNN Prediction: Non-cancerous
SVM Prediction: Non-cancerous
Naive Bayes Prediction: Cancerous


In [49]:
# Create separate images for each type of lesion
white_lesion_img = np.zeros_like(test_img_rgb, dtype=np.uint8)
red_lesion_img = np.zeros_like(test_img_rgb, dtype=np.uint8)

# Fill the masks with lesion pixel locations
white_lesion_img[white_lesion[:, 0], white_lesion[:, 1], :] = [255, 255, 255]  # Set white lesion as white color
red_lesion_img[red_lesion[:, 0], red_lesion[:, 1], :] = [255, 0, 0]  # Set red lesion as red color

# Create semi-transparent overlays
white_lesion_overlay = cv2.addWeighted(test_img_rgb, 0.5, white_lesion_img, 0.5, 0)
red_lesion_overlay = cv2.addWeighted(test_img_rgb, 0.5, red_lesion_img, 0.5, 0)

# Display the original image with overlays
fig, axs = plt.subplots(1, 2, figsize=(12, 6))

axs[0].imshow(white_lesion_overlay)
axs[0].set_title('White Lesion Overlay')
axs[0].axis('off')

axs[1].imshow(red_lesion_overlay)
axs[1].set_title('Red Lesion Overlay')
axs[1].axis('off')

plt.tight_layout()
plt.show()


NameError: name 'test_img_rgb' is not defined

In [75]:
# Check the maximum and minimum coordinate values for white lesion
print("White Lesion - Min X:", np.min(white_lesion[:, 0]), "Max X:", np.max(white_lesion[:, 0]))
print("White Lesion - Min Y:", np.min(white_lesion[:, 1]), "Max Y:", np.max(white_lesion[:, 1]))

# Check the maximum and minimum coordinate values for red lesion
print("Red Lesion - Min X:", np.min(red_lesion[:, 0]), "Max X:", np.max(red_lesion[:, 0]))
print("Red Lesion - Min Y:", np.min(red_lesion[:, 1]), "Max Y:", np.max(red_lesion[:, 1]))


White Lesion - Min X: 4.819708934888565 Max X: 4.819708934888657
White Lesion - Min Y: 0.61950611927116 Max Y: 0.6195061192711702
Red Lesion - Min X: 3.485900964869355 Max X: 262.4534248246639
Red Lesion - Min Y: 0.11360062224559209 Max Y: 257.8375079181564
