In [7]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [8]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for category in ['NORMAL', 'PNEUMONIA']:
        category_path = os.path.join(folder, category)
        for filename in os.listdir(category_path):
            img_path = os.path.join(category_path, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  
            if img is not None:
                img = cv2.resize(img, (128, 128))
                images.append(img)
                labels.append(category)
    return images, labels

In [11]:
train_images, train_labels = load_images_from_folder('/kaggle/input/chest-xray-pneumonia/chest_xray/train')
test_images, test_labels = load_images_from_folder('/kaggle/input/chest-xray-pneumonia/chest_xray/test')
val_images, val_labels = load_images_from_folder('/kaggle/input/chest-xray-pneumonia/chest_xray/val')

In [16]:
train_images, val_images, train_labels, val_labels = train_test_split(
    np.concatenate((train_images, val_images)), 
    np.concatenate((train_labels, val_labels)), 
    test_size=0.2, 
    random_state=42
)

# Convert labels to one-hot encoding
train_labels = to_categorical(train_labels, num_classes=2)
test_labels = to_categorical(test_labels, num_classes=2)
val_labels = to_categorical(val_labels, num_classes=2)

# Print the shapes of the resulting arrays
print("Train Images Shape:", train_images.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Images Shape:", test_images.shape)
print("Test Labels Shape:", test_labels.shape)
print("Validation Images Shape:", val_images.shape)
print("Validation Labels Shape:", val_labels.shape)

Train Images Shape: (3337, 128, 128)
Train Labels Shape: (3337, 2, 2)
Test Images Shape: (1044, 128, 128)
Test Labels Shape: (1044, 2, 2)
Validation Images Shape: (835, 128, 128)
Validation Labels Shape: (835, 2, 2)
