In [3]:
# 1) Path setup
from pathlib import Path
import os

# ✅ Correct dataset path
DATA_DIR = Path(r"C:\Users\SANTOSH KUMAR SAHOO\Downloads\archive (6)\chest_xray")

print("DATA_DIR:", DATA_DIR)
print("Exists?", DATA_DIR.exists())
print("Is dir?", DATA_DIR.is_dir())

# 2) Check folder structure and image counts
img_exts = {".png", ".jpg", ".jpeg"}
for split in ["train", "val", "test"]:
    split_dir = DATA_DIR / split
    print(f"\n[{split}] -> exists: {split_dir.exists()}")
    if split_dir.exists():
        for cls in sorted(p for p in split_dir.iterdir() if p.is_dir()):
            count = sum(1 for f in cls.rglob("*") if f.suffix.lower() in img_exts)
            print(f"  {cls.name:10} -> {count} images")
    else:
        print(f"  Missing folder: {split_dir}")

# 3) Data generators (only if DATA_DIR exists)
if DATA_DIR.exists():
    import tensorflow as tf
    from tensorflow.keras.preprocessing.image import ImageDataGenerator

    IMG_SIZE = 224
    BATCH = 32
    SEED = 42

    train_dir = str(DATA_DIR / "train")
    val_dir   = str(DATA_DIR / "val")
    test_dir  = str(DATA_DIR / "test")

    # Data augmentation for training
    train_aug = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode="nearest"
    )

    # Validation/Test should only be rescaled
    val_test_aug = ImageDataGenerator(rescale=1./255)

    train_gen = train_aug.flow_from_directory(
        train_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH,
        class_mode='binary',
        shuffle=True,
        seed=SEED
    )

    val_gen = val_test_aug.flow_from_directory(
        val_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH,
        class_mode='binary',
        shuffle=False
    )

    test_gen = val_test_aug.flow_from_directory(
        test_dir,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH,
        class_mode='binary',
        shuffle=False
    )

    print("✅ Class indices:", train_gen.class_indices)

else:
    print("\n❌ DATA_DIR not found. Please unzip the dataset properly.")


DATA_DIR: C:\Users\SANTOSH KUMAR SAHOO\Downloads\archive (6)\chest_xray
Exists? True
Is dir? True

[train] -> exists: True
  NORMAL     -> 1341 images
  PNEUMONIA  -> 3875 images

[val] -> exists: True
  NORMAL     -> 8 images
  PNEUMONIA  -> 8 images

[test] -> exists: True
  NORMAL     -> 234 images
  PNEUMONIA  -> 390 images


ModuleNotFoundError: No module named 'tensorflow'