Importing the necessary libraries

In [None]:
import cv2
from skimage.restoration import denoise_nl_means, estimate_sigma
import numpy as np
import matplotlib as plot
from tqdm import tqdm
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

Preprocessing data with the filtering techinques
1. Median Filtered
2. NLM Filtered

In [None]:
# Resize images for uniformity
IMG_SIZE = 224

# Noise filtering functions
def apply_median_filter(images):
    return np.array([cv2.medianBlur(img, 3) for img in images])

def apply_nlm_filter(images):
    # Convert to float, estimate sigma, apply NLM
    filtered = []
    for img in images:
        sigma_est = np.mean(estimate_sigma(img, multichannel=False))
        denoised = denoise_nl_means(img, h=1.15*sigma_est, fast_mode=True,
                                    patch_size=5, patch_distance=3, multichannel=False)
        filtered.append((denoised * 255).astype(np.uint8))
    return np.array(filtered)

# Preprocess function with denoising
def preprocess_and_filter(images, labels, method="original"):
    processed_images = []
    new_labels = []

    for path, label in tqdm(zip(images, labels), total=len(images)):
        img = cv2.imread(path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if method == "median":
            img = apply_median_filter(img)
        elif method == "nlm":
            img = apply_nlm_filter(img)
        # else keep original

        processed_images.append(img)
        new_labels.append(label)

    return np.array(processed_images), np.array(new_labels)


Preparing 3 datasets
1. Original
2. Median Filter
3. NLM Filter

In [None]:
images_original, labels_original = preprocess_and_filter(image_paths, labels, method="original")
images_median, labels_median = preprocess_and_filter(image_paths, labels, method="median")
images_nlm, labels_nlm = preprocess_and_filter(image_paths, labels, method="nlm")

Loading the pre-trained ResNet-50 with imageNet weights to extract cutting edge feaatures from the x-ray

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

print("ResNet50 loaded.")

Defining the extract feature method

In [None]:
def extract_features(images):
    # Preprocess for ResNet50
    images_prep = preprocess_input(images)
    # Extract features
    features = model.predict(images_prep, batch_size=32, verbose=1)
    # Flatten to 1D vectors
    features_flat = features.reshape(features.shape[0], -1)
    return features_flat

Extracting features for all the 3 datasets

In [None]:
features_original = extract_features(images_original)
features_median = extract_features(images_median)
features_nlm = extract_features(images_nlm)

Since the vector or dimensions creating issue with KNN , applying PCA on features vectors

In [None]:
def apply_pca_to_reduce_feature(X_scaled):
    pca = PCA(n_components=100, svd_solver='randomized', random_state=42)
    X_pca = pca.fit_transform(X_scaled)
    return X_pca

Creating test train split for the data

In [None]:
def stratified_split(features, labels):
    return train_test_split(features, labels, test_size=0.7, random_state=42, stratify=labels)

X_train_orig, X_test_orig, y_train_orig, y_test_orig = stratified_split(apply_pca_to_reduce_feature(features_original), labels_enc)
X_train_median, X_test_median, y_train_median, y_test_median = stratified_split(apply_pca_to_reduce_feature(features_median), labels_enc)
X_train_nlm, X_test_nlm, y_train_nlm, y_test_nlm = stratified_split(apply_pca_to_reduce_feature(features_nlm), labels_enc)

Creating a dictionary to maintain information about the different Scaling techinques


In [None]:


scalers = {
    "Standard": StandardScaler(),
    "MinMax": MinMaxScaler(),
    "Robust": RobustScaler()
}


Defining KNN classifier for different distance metrics distances=["euclidean","manhattan","chebyshev","minkowski","cosine","correlation","canberra","braycurtis"] and [1,3,5,7] neighbors

In [None]:
def evaluate_knn(X_train, X_test, y_train, y_test, scalers, metrics,distances):
    results = []

    for scaler_name, scaler in scalers.items():
        # Fit and transform data
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        for ne in [1,3,5,7]:
            for metric in distances:
                knn = KNeighborsClassifier(n_neighbors=ne,n_jobs=1,metric=metric)
                knn.fit(X_train_scaled, y_train)
                y_pred = knn.predict(X_test_scaled)
                acc = accuracy_score(y_test, y_pred)
    
                results.append({
                    "Scaler": scaler_name,
                    "Distance": metric,
                    "Accuracy": acc,
                    "Neighbors":ne
                })

    return results


Computing 3 results vectors

In [None]:
results_orig = evaluate_knn(X_train_orig, X_test_orig, y_train_orig, y_test_orig, scalers, metrics,distances)
results_median = evaluate_knn(X_train_median, X_test_median, y_train_median, y_test_median, scalers, metrics)
results_nlm = evaluate_knn(X_train_nlm, X_test_nlm, y_train_nlm, y_test_nlm, scalers, metrics)


Generating DataFrames