Importing the necessary libraries

In [None]:
import os
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm

# Define dataset path
dataset_path = "/kaggle/input/chest-xray-covid19-pneumonia/Data/train"

# Map folders to labels
class_map = {
    "COVID19": "COVID19",
    "NORMAL": "Normal",
    "PNEUMONIA": "Pneumonia"
}

# Locate image paths and labels
image_paths = []
labels = []

for folder_name, label in class_map.items():
    folder = os.path.join(dataset_path, folder_name)
    for filename in os.listdir(folder):
        if filename.endswith(".png") or filename.endswith(".jpeg") or filename.endswith(".jpg"):
            image_paths.append(os.path.join(folder, filename))
            labels.append(label)

print(f"Total images found: {len(image_paths)}")

# Show sample images
def show_samples(paths, labels, n=5):
    plt.figure(figsize=(15, 5))
    for i in range(n):
        img = cv2.imread(paths[i])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.subplot(1, n, i+1)
        plt.imshow(img)
        plt.title(labels[i])
        plt.axis("off")
    plt.show()

show_samples(image_paths, labels)


Preprocessing data with the filtering techinques
1. Median Filtered
2. NLM Filtered

In [None]:
# Resize images for uniformity
IMG_SIZE = 224

# Noise filtering functions
def apply_median_filter(img):
    return cv2.medianBlur(img, 3)

def apply_nlm_filter(img):
    return cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)

# Preprocess function with denoising
def preprocess_and_filter(images, labels, method="original"):
    processed_images = []
    new_labels = []

    for path, label in tqdm(zip(images, labels), total=len(images)):
        img = cv2.imread(path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if method == "median":
            img = apply_median_filter(img)
        elif method == "nlm":
            img = apply_nlm_filter(img)
        # else keep original

        processed_images.append(img)
        new_labels.append(label)

    return np.array(processed_images), np.array(new_labels)
