In [None]:
import os
import shutil
import random

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras import layers

In [None]:
##folder structures and directory
RAW_DIR = "data/raw"
OUT_DIR = "data"

##splitting ratio
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

##Reproducibility
SEED = 42
random.seed(SEED)

##Identify class labels
item_list=os.listdir(RAW_DIR)

##Folder names become class labels
##keeps only folders ["apple", "mango", "grapes", "potato"]
classes = [cls for cls in item_list if os.path.isdir(os.path.join(RAW_DIR, cls))] 

# Create directory structure
##Create output directory structure
 ##Iterates over dataset splits
for split in ["train", "val", "test"]:
    ##Iterates over class labels
    for clss in classes:
        os.makedirs(os.path.join(OUT_DIR, split, clss), exist_ok=True) #Creates directories

# Split per class
#Process one class at a time
#This ensures class-wise stratification
for cls in classes:
    #full path to one class folder like data/raw/apple
    cls_path = os.path.join(RAW_DIR, cls)
    #Collects only image files
    images = [f for f in os.listdir(cls_path) if os.path.isfile(os.path.join(cls_path, f))]

    random.shuffle(images)

    #Compute split sizes
    n_total = len(images)
    n_train = int(TRAIN_RATIO * n_total)
    n_val = int(VAL_RATIO * n_total)

    train_imgs = images[:n_train]
    val_imgs = images[n_train:n_train + n_val]
    test_imgs = images[n_train + n_val:]

    #Copy images into split folders
    for img in train_imgs:
        shutil.copy(
            os.path.join(cls_path, img),
            os.path.join(OUT_DIR, "train", cls, img)
        )

    for img in val_imgs:
        shutil.copy(
            os.path.join(cls_path, img),
            os.path.join(OUT_DIR, "val", cls, img)
        )

    for img in test_imgs:
        shutil.copy(
            os.path.join(cls_path, img),
            os.path.join(OUT_DIR, "test", cls, img)
        )

print("Train / Validation / Test split completed.")

Train / Validation / Test split completed.


In [None]:
# import tensorflow as tf

# IMG_SIZE = (224, 224)
# BATCH_SIZE = 32

# train_ds = tf.keras.utils.image_dataset_from_directory(
#     "raw/train",
#     image_size=IMG_SIZE,
#     batch_size=BATCH_SIZE,
#     shuffle=True,
#     seed=42
# )

# val_ds = tf.keras.utils.image_dataset_from_directory(
#     "raw/val",
#     image_size=IMG_SIZE,
#     batch_size=BATCH_SIZE,
#     shuffle=False
# )

# test_ds = tf.keras.utils.image_dataset_from_directory(
#     "raw/test",
#     image_size=IMG_SIZE,
#     batch_size=BATCH_SIZE,
#     shuffle=False
# )

FileNotFoundError: [Errno 2] No such file or directory: 'raw/train'