In [28]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import shutil
from sklearn.model_selection import train_test_split

In [29]:
dataset_path = "./og-dataset"
output_dir = "./sorted-dataset"
os.makedirs(output_dir, exist_ok=True)

# Create output directories

In [30]:
train_dir = os.path.join(output_dir, "train")
val_dir = os.path.join(output_dir, "validation")
test_dir = os.path.join(output_dir, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Split each class folder into train, val, and test

In [31]:
for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    if not os.path.isdir(class_path):
        continue  # Skip non-folder files

    images = os.listdir(class_path)
    train_files, temp_files = train_test_split(images, test_size=0.3, random_state=42)  # 70% train
    val_files, test_files = train_test_split(temp_files, test_size=0.5, random_state=42)  # 15% val, 15% test

    # Copy files to respective directories
    for file_name in train_files:
        src = os.path.join(class_path, file_name)
        dest = os.path.join(train_dir, class_name)
        os.makedirs(dest, exist_ok=True)
        shutil.copy(src, dest)

    for file_name in val_files:
        src = os.path.join(class_path, file_name)
        dest = os.path.join(val_dir, class_name)
        os.makedirs(dest, exist_ok=True)
        shutil.copy(src, dest)

    for file_name in test_files:
        src = os.path.join(class_path, file_name)
        dest = os.path.join(test_dir, class_name)
        os.makedirs(dest, exist_ok=True)
        shutil.copy(src, dest)

# Image Parameters

In [17]:
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32

# Splitting dataset into training, validation, and testing sets

In [18]:
train_dir = "./sorted-dataset/train"
val_dir = "./sorted-dataset/validation"
test_dir = "./sorted-dataset/test"

len(train_dir), len(val_dir), len(test_dir)

(15, 20, 14)

# Data Augmentation and Rescaling for Training

In [19]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values to [0, 1]
    zoom_range=0.2,       # Zoom in/out by 20%
    validation_split=0.2  # Split into training and validation sets
)

In [20]:
# Data Generator for Training, Testing and validation

In [21]:
train_data = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",  # For 257 classes
    subset="training"  # Training split
)

Found 24580 images belonging to 257 classes.


In [22]:
val_data = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    subset="validation"  # Validation split
)

Found 6027 images belonging to 257 classes.


In [23]:
test_datagen = ImageDataGenerator(rescale=1.0 / 255.0)
test_data = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

FileNotFoundError: [Errno 2] No such file or directory: './dataset/test'