In [20]:
import os
import cv2
import numpy as np
from skimage import img_as_ubyte
from skimage.filters import gabor
from skimage.feature import greycomatrix, greycoprops
from skimage.segmentation import active_contour
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

In [29]:
def extract_features(image):
    # GLCM features
    image_rescaled = image.astype(np.uint8)  # Ensure the image type is suitable for GLCM
    glcm = greycomatrix(image_rescaled, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256, symmetric=True, normed=True)
    energy = greycoprops(glcm, 'energy')
    homogeneity = greycoprops(glcm, 'homogeneity')
    contrast = greycoprops(glcm, 'contrast')
    correlation = greycoprops(glcm, 'correlation')

    # Gabor features
    gabor_filters_real, gabor_filters_imag = gabor(image_rescaled, frequency=0.6)
    gabor_energy = np.mean(np.sqrt(gabor_filters_real**2 + gabor_filters_imag**2))
    gabor_amplitude = np.mean(np.abs(gabor_filters_real))

    # Color features
    img_rgb = cv2.cvtColor(image_rescaled, cv2.COLOR_GRAY2RGB)  # Assuming input is grayscale
    mean_rgb = np.mean(img_rgb, axis=(0, 1))
    std_rgb = np.std(img_rgb, axis=(0, 1))

    # Ensure all features are flattened to 1D arrays
    energy = np.ravel(energy)
    homogeneity = np.ravel(homogeneity)
    contrast = np.ravel(contrast)
    correlation = np.ravel(correlation)
    mean_rgb = mean_rgb.flatten()
    std_rgb = std_rgb.flatten()

    # Concatenate all features into a single feature vector
    feature_vector = np.concatenate([energy, homogeneity, contrast, correlation,
                                     [gabor_energy, gabor_amplitude],
                                     mean_rgb, std_rgb])
    return feature_vector


In [30]:
def segment_lesion(image):
    # Convert RGB image to YCbCr color space
    image_YCbCr = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)

    # Create lesion masks based on thresholds
    mean_Cr = np.mean(image_YCbCr[:, :, 1])
    threshold = 80

    white_lesion_mask = np.zeros_like(image_YCbCr[:, :, 2], dtype=np.uint8)
    red_lesion_mask = np.zeros_like(image_YCbCr[:, :, 1], dtype=np.uint8)

    if mean_Cr < threshold:
        # White lesion mask using Cb intensity
        mean_Cb = np.mean(image_YCbCr[:, :, 2])
        white_lesion_mask[image_YCbCr[:, :, 2] > mean_Cb] = 255
    else:
        # Red lesion mask using Cr intensity
        mean_Cr = np.mean(image_YCbCr[:, :, 1])
        red_lesion_mask[image_YCbCr[:, :, 1] > mean_Cr] = 255

    # Active contour segmentation on original image
    white_lesion = active_contour(image, white_lesion_mask)
    red_lesion = active_contour(image, red_lesion_mask)

    
    return white_lesion, red_lesion


In [None]:
images_directory = r"E:\ocd\Images"
image_files = os.listdir(images_directory)

# Filter images containing 'oral_cancer' in their filenames
cancerous_image_files = [filename for filename in image_files if 'oral_cancer' in filename]
non_cancerous_image_files = [filename for filename in image_files if 'oral_cancer' not in filename]

cancerous_image_paths = [os.path.join(images_directory, filename) for filename in cancerous_image_files]
non_cancerous_image_paths = [os.path.join(images_directory, filename) for filename in non_cancerous_image_files]

all_image_paths = cancerous_image_paths + non_cancerous_image_paths

# Extract features from all images in the dataset
X_features = []
all_labels = []

for img_path in all_image_paths:
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    print(img_path)
    # Segmentation of lesions
    white_lesion, red_lesion = segment_lesion(img_rgb)

    # Extract features from segmented lesions 
    features_white_lesion = extract_features(white_lesion)
    features_red_lesion = extract_features(red_lesion)

    # Combine features from both types of lesions
    combined_features = np.concatenate([features_white_lesion, features_red_lesion])

    # Append to feature list
    X_features.append(combined_features)

    # Assign labels based on 'oral_cancer' substring in filename
    if 'oral_cancer' in img_path:
        all_labels.append('Cancerous')
    else:
        all_labels.append('Non-cancerous')
    print("done")


X_features = np.array(X_features)
all_labels = np.array(all_labels)
print("done")

E:\ocd\Images\oral_cancer (1).jpeg
done
E:\ocd\Images\oral_cancer (1).jpg
done
E:\ocd\Images\oral_cancer (1).png
done
E:\ocd\Images\oral_cancer (10).jpeg
done
E:\ocd\Images\oral_cancer (10).jpg
done
E:\ocd\Images\oral_cancer (10).png
done
E:\ocd\Images\oral_cancer (11).jpg
done
E:\ocd\Images\oral_cancer (11).png
done
E:\ocd\Images\oral_cancer (12).jpg
done
E:\ocd\Images\oral_cancer (12).png
done
E:\ocd\Images\oral_cancer (13).jpg
done
E:\ocd\Images\oral_cancer (13).png
done
E:\ocd\Images\oral_cancer (14).jpg
done
E:\ocd\Images\oral_cancer (14).png
done
E:\ocd\Images\oral_cancer (15).jpg
done
E:\ocd\Images\oral_cancer (15).png
done
E:\ocd\Images\oral_cancer (16).jpg
done
E:\ocd\Images\oral_cancer (17).jpg
done
E:\ocd\Images\oral_cancer (18).jpg
done
E:\ocd\Images\oral_cancer (19).jpg
done
E:\ocd\Images\oral_cancer (2).jpeg
done
E:\ocd\Images\oral_cancer (2).jpg
done
E:\ocd\Images\oral_cancer (2).png
done
E:\ocd\Images\oral_cancer (20).jpg
done
E:\ocd\Images\oral_cancer (21).jpg
done
E:\

In [9]:
pca = PCA()
X_features_pca = pca.fit_transform(X_features)

# Split data into training and testing sets after PCA
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(X_features_pca, all_labels, test_size=0.2, random_state=42)

In [26]:
nb_classifier = GaussianNB()

# Train the Naive Bayes classifier
nb_classifier.fit(X_train, y_train)

# Predict on the test set
nb_predictions = nb_classifier.predict(X_test)

# Calculate accuracy
nb_accuracy = accuracy_score(y_test, nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)

Naive Bayes Accuracy: 0.49019607843137253


In [21]:
svm_classifier = SVC(kernel='linear')  

# Train the SVM classifier
svm_classifier.fit(X_train, y_train)

# Predict on the test set
svm_predictions = svm_classifier.predict(X_test)

# Calculate accuracy
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)

SVM Accuracy: 0.43137254901960786


In [24]:
# K-Nearest Neighbors (KNN) Classifier
knn = KNeighborsClassifier(n_neighbors=4)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_pred)
print("KNN Accuracy:",knn_accuracy)

KNN Accuracy: 0.49019607843137253
