In [1]:
#imports
import os
import cv2
from sklearn.model_selection import train_test_split


In [2]:
# Paths & Configs

DATA_DIR = r"C:\Users\user\Desktop\finalmalaria\cell_images"
PREPROCESS_DIR = r"C:\Users\user\Desktop\finalmalaria\preprocessed_data"
TARGET_SIZE = (224,224)

In [3]:
# Create directories
for split in ["train","val","test"]:
    split_dir = os.path.join(PREPROCESS_DIR, split)
    os.makedirs(split_dir, exist_ok=True)
    for class_folder in os.listdir(DATA_DIR):
        os.makedirs(os.path.join(split_dir,class_folder), exist_ok=True)

In [4]:
#Preprocess Function
def preprocess_image(image_path, target_size=TARGET_SIZE):
    image = cv2.imread(image_path)
    if image is None: return None
    image = cv2.resize(image, target_size)
    image = image / 255.0
    return image

 # Split and Save
for class_folder in os.listdir(DATA_DIR):
    class_path = os.path.join(DATA_DIR,class_folder)
    images = [img for img in os.listdir(class_path) if img.lower().endswith(('.png','.jpg','.jpeg'))]
    
    train_imgs, temp_imgs = train_test_split(images, test_size=0.3, random_state=42)
    val_imgs, test_imgs = train_test_split(temp_imgs, test_size=0.5, random_state=42)
    
    for split_name, img_list in zip(["train","val","test"], [train_imgs,val_imgs,test_imgs]):
        save_path = os.path.join(PREPROCESS_DIR, split_name, class_folder)
        for img_name in img_list:
            img = preprocess_image(os.path.join(class_path,img_name))
            if img is not None:
                cv2.imwrite(os.path.join(save_path,img_name), (img*255).astype('uint8'))


In [5]:
#â€” Display Counts

for split in ["train","val","test"]:
    print(f"\n{split.upper()} counts:")
    split_path = os.path.join(PREPROCESS_DIR, split)
    total = 0
    for class_folder in os.listdir(split_path):
        count = len(os.listdir(os.path.join(split_path,class_folder)))
        print(f"{class_folder}: {count}")
        total += count
    print(f"Total {split}: {total}")


TRAIN counts:
Parasitized: 9645
Uninfected: 9645
Total train: 19290

VAL counts:
Parasitized: 2067
Uninfected: 2067
Total val: 4134

TEST counts:
Parasitized: 2067
Uninfected: 2067
Total test: 4134
