In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import os

# Define the base path to the dataset
base_path = r"..\\data\\"
base_path_out = os.path.join(base_path, "processed")


# Input and output folders
input_folder = os.path.join(base_path_out, "converted_grayscale")
output_folder = os.path.join(base_path_out, "resized_and_normalized_images_without_masks")
os.makedirs(output_folder, exist_ok=True)

# Initialize lists to store images and labels
X = []
y = []

# Process each image
for filename in tqdm(os.listdir(input_folder)):
    if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue

    file_path = os.path.join(input_folder, filename)

    # Read the image in grayscale
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        continue

    # Resize image to 128x128
    img_resized = cv2.resize(img, (128, 128))

    # Normalize pixel values to [0, 1] (divide by 255)
    img_normalized = img_resized.astype(np.float32) / 255.0

    # Append normalized and resized image to the list
    X.append(img_normalized)

    # Extract label from filename 
    label = filename.split('_')[0]
    y.append(label)

    # Save the resized and normalized image (re-multiply for saving as 8-bit image)
    save_img = (img_normalized * 255).astype(np.uint8)
    save_path = os.path.join(output_folder, filename)
    cv2.imwrite(save_path, save_img)

# Convert to NumPy arrays
X = np.array(X)
y = np.array(y)

# Print the shape of the processed data and unique labels
print("Resized and Normalized images loaded:", X.shape)
print("Labels:", np.unique(y))


100%|██████████| 21165/21165 [00:58<00:00, 359.24it/s]


Resized and Normalized images loaded: (21165, 128, 128)
Labels: ['COVID' 'Lung' 'Normal' 'Viral Pneumonia']
