In [14]:
import pandas as pd
import os
import shutil
import random
from PIL import Image
from pathlib import Path

In [10]:
original_normal_path = '../data/raw/NORMAL'
original_pneumonia_path = '../data/raw/PNEUMONIA'
new_path = "../data/processed"
normal_path = os.path.join(new_path, "NORMAL")
viral_path = os.path.join(new_path, "PNEUMONIA", "VIRAL")
bacteria_path = os.path.join(new_path, "PNEUMONIA", "BACTERIAL")

In [5]:
os.makedirs(normal_path, exist_ok=True)
os.makedirs(viral_path, exist_ok=True)
os.makedirs(bacteria_path, exist_ok=True)

In [8]:
for root, dirs, files in os.walk(original_normal_path):
    for filename in files:
        if filename.endswith(('.jpeg', '.jpg', '.png')):
            source_path = os.path.join(root, filename)
            
            dest_path = os.path.join(normal_path, f"normal_{len(os.listdir(normal_path))}.jpeg")
            shutil.copy(source_path, dest_path)

In [None]:
for root, dirs, files in os.walk(original_pneumonia_path):
    for filename in files:
        if filename.endswith(('.jpeg', '.jpg', '.png')):
            source_path = os.path.join(root, filename)
            fname_lower = filename.lower()
            if 'virus' in fname_lower or 'viral' in fname_lower:
                dest_path = os.path.join(viral_path, f"viral_{len(os.listdir(viral_path))}.jpeg")
                shutil.copy(source_path, dest_path)
            elif 'bacteria' in fname_lower or 'bacterial' in fname_lower:
                    dest_path = os.path.join(bacteria_path, f"bacterial_{len(os.listdir(bacteria_path))}.jpeg")
                    shutil.copy(source_path, dest_path)


In [15]:
INPUT_PATH = "../data/processed"
OUTPUT_PATH = "../data/splitted"
IMG_SIZE = (224, 224)
SPLIT_RATIOS = {'train': 0.7, 'val': 0.15, 'test': 0.15}

CLASSES = {
    "NORMAL": os.path.join(INPUT_PATH, "NORMAL"),
    "PNEUMONIA/BACTERIAL": os.path.join(INPUT_PATH, "PNEUMONIA", "BACTERIAL"),
    "PNEUMONIA/VIRAL": os.path.join(INPUT_PATH, "PNEUMONIA", "VIRAL")
}

def prepare_directories():
    if os.path.exists(OUTPUT_PATH):
        shutil.rmtree(OUTPUT_PATH)
    for split in SPLIT_RATIOS:
        for clase_rel in CLASSES:
            dest_dir = os.path.join(OUTPUT_PATH, split, clase_rel)
            os.makedirs(dest_dir, exist_ok=True)

def process_and_save(src_path, dst_path):
    try:
        with Image.open(src_path) as img:
            img = img.convert("RGB")
            img = img.resize(IMG_SIZE)
            img.save(dst_path)
    except Exception as e:
        print(f"Processing error: {src_path}: {e}")

def split_and_save(clase_rel, src_dir):
    img_files = [f for f in os.listdir(src_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(img_files)

    total = len(img_files)
    n_train = int(SPLIT_RATIOS['train'] * total)
    n_val = int(SPLIT_RATIOS['val'] * total)

    split_data = {
        'train': img_files[:n_train],
        'val': img_files[n_train:n_train + n_val],
        'test': img_files[n_train + n_val:]
    }

    for split, filenames in split_data.items():
        for i, fname in enumerate(filenames):
            src = os.path.join(src_dir, fname)
            new_name = f"{clase_rel.replace('/', '_')}_{i}.jpeg"
            dst = os.path.join(OUTPUT_PATH, split, clase_rel, new_name)
            process_and_save(src, dst)

def main():
    prepare_directories()
    for clase_rel, clase_path in CLASSES.items():
        print(f"Processing class: {clase_rel}")
        split_and_save(clase_rel, clase_path)
    print("Split and resize completed. Structure in 'splitted/'.")

if __name__ == "__main__":
    main()


Processing class: NORMAL
Processing class: PNEUMONIA/BACTERIAL
Processing class: PNEUMONIA/VIRAL
✅ Split and resize completed. Structure in 'splitted/'.
