# Cell 1: Imports & Configuration

In [None]:
import os
import shutil
import random
from PIL import Image

# Base directories
BASE_DIR = 'F:/Semester/Research/Medicinal Weedy area plants classifications/Main Dataset/weedy_area_medicinal_plants_Original_size'
RESIZED_DIR = 'F:/Semester/Research/Medicinal Weedy area plants classifications/Main Dataset/resized_medicinal_image_dataset_2'
SPLIT_BASE_DIR = 'F:/Semester/Research/Medicinal Weedy area plants classifications/Main Dataset/medicinal_weedy_area_image_dataset_version2'

# Dataset split directories
TRAIN_DIR = os.path.join(SPLIT_BASE_DIR, 'train')
VAL_DIR = os.path.join(SPLIT_BASE_DIR, 'val')
TEST_DIR = os.path.join(SPLIT_BASE_DIR, 'test')

# Image size
TARGET_SIZE = (300, 300)

# Split ratios
TRAIN_RATIO = 0.7
VAL_RATIO = 0.15
TEST_RATIO = 0.15

# Create required directories
for path in [RESIZED_DIR, TRAIN_DIR, VAL_DIR, TEST_DIR]:
    os.makedirs(path, exist_ok=True)


# Cell 2: Resize Images

In [None]:


for class_name in os.listdir(BASE_DIR):
    class_path = os.path.join(BASE_DIR, class_name)
    output_class_path = os.path.join(RESIZED_DIR, class_name)

    if not os.path.isdir(class_path):
        continue

    os.makedirs(output_class_path, exist_ok=True)

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        output_img_path = os.path.join(output_class_path, img_name)

        try:
            with Image.open(img_path) as img:
                img = img.convert("RGB")
                img = img.resize(TARGET_SIZE)
                img.save(output_img_path)
        except Exception as e:
            print(f"❌ Failed to process {img_path}: {e}")

print("✅ Image resizing completed successfully.")


# Cell 3: Dataset Splitting

In [None]:

for class_name in os.listdir(RESIZED_DIR):
    class_path = os.path.join(RESIZED_DIR, class_name)

    if not os.path.isdir(class_path):
        continue

    images = [
        f for f in os.listdir(class_path)
        if f.lower().endswith(('.png', '.jpg', '.jpeg'))
    ]
    random.shuffle(images)

    n_total = len(images)
    n_train = int(TRAIN_RATIO * n_total)
    n_val = int(VAL_RATIO * n_total)

    train_images = images[:n_train]
    val_images = images[n_train:n_train + n_val]
    test_images = images[n_train + n_val:]

    for split, img_list in zip(
        [TRAIN_DIR, VAL_DIR, TEST_DIR],
        [train_images, val_images, test_images]
    ):
        split_class_dir = os.path.join(split, class_name)
        os.makedirs(split_class_dir, exist_ok=True)

        for img_name in img_list:
            src = os.path.join(class_path, img_name)
            dst = os.path.join(split_class_dir, img_name)
            shutil.copy(src, dst)

print("✅ Dataset split completed: train, val, and test sets created.")
