In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import shutil
import random
from pathlib import Path
from tqdm import tqdm

# Set the paths
source_dir = '/content/drive/MyDrive/datasetv2'  # Change to your source directory
train_dir = '/content/drive/MyDrive/data/train'
val_dir = '/content/drive/MyDrive/data/val'
split_ratio = 0.8  # Ratio for training data

# Ensure directories exist
Path(train_dir).mkdir(parents=True, exist_ok=True)
Path(val_dir).mkdir(parents=True, exist_ok=True)

# Iterate over each class in the source directory
for class_name in os.listdir(source_dir):
    class_dir = os.path.join(source_dir, class_name)

    if not os.path.isdir(class_dir):
        continue

    # Create class directories in train and val folders
    Path(os.path.join(train_dir, class_name)).mkdir(parents=True, exist_ok=True)
    Path(os.path.join(val_dir, class_name)).mkdir(parents=True, exist_ok=True)

    # List all files in the class directory
    files = os.listdir(class_dir)
    random.shuffle(files)

    # Split the files into training and validation sets
    split_index = int(len(files) * split_ratio)
    train_files = files[:split_index]
    val_files = files[split_index:]

    # Copy files to the respective directories
    for file_name in tqdm(train_files, desc=f"Processing {class_name} - train"):
        src_file = os.path.join(class_dir, file_name)
        dst_file = os.path.join(train_dir, class_name, file_name)
        shutil.copy2(src_file, dst_file)

    for file_name in tqdm(val_files, desc=f"Processing {class_name} - val"):
        src_file = os.path.join(class_dir, file_name)
        dst_file = os.path.join(val_dir, class_name, file_name)
        shutil.copy2(src_file, dst_file)

print("Dataset split and copied successfully.")


Processing biological - train: 100%|██████████| 788/788 [00:33<00:00, 23.50it/s]
Processing biological - val: 100%|██████████| 197/197 [00:03<00:00, 55.09it/s]
Processing plastic - train: 100%|██████████| 800/800 [00:35<00:00, 22.85it/s]
Processing plastic - val: 100%|██████████| 200/200 [00:03<00:00, 50.10it/s]
Processing metal - train: 100%|██████████| 800/800 [00:56<00:00, 14.09it/s]
Processing metal - val: 100%|██████████| 200/200 [00:03<00:00, 59.10it/s]
Processing trash - train: 100%|██████████| 667/667 [00:43<00:00, 15.38it/s]
Processing trash - val: 100%|██████████| 167/167 [00:02<00:00, 59.67it/s]
Processing paper - train: 100%|██████████| 800/800 [00:35<00:00, 22.45it/s]
Processing paper - val: 100%|██████████| 200/200 [00:03<00:00, 50.29it/s]
Processing glass - train: 100%|██████████| 800/800 [00:59<00:00, 13.48it/s]
Processing glass - val: 100%|██████████| 200/200 [00:03<00:00, 53.52it/s]
Processing cardboard - train: 100%|██████████| 800/800 [00:34<00:00, 23.09it/s]
Proces

Dataset split and copied successfully.



