<a href="https://colab.research.google.com/github/TharinsaMudalige/Neuron-Brain_Tumor_Detection_Classification_with_XAI/blob/Detection-Classficiation-CNN/Splitting_and_Labeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import shutil
import random

# ================================
# 2. CONFIGURE PATHS & SPLIT RATIOS
# ================================
# Update these paths as needed:
ORIG_DATASET_PATH = '/content/drive/MyDrive/DSGP/Original_Dataset'  # folder with your 16 class subfolders
SPLIT_DATASET_PATH = '/content/drive/MyDrive/DSGP/Splitted_Dataset' # where you want the split dataset saved

TRAIN_RATIO = 0.8
VAL_RATIO   = 0.1
TEST_RATIO  = 0.1

# 3. CREATE TRAIN/VAL/TEST FOLDERS
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(SPLIT_DATASET_PATH, split), exist_ok=True)

# 4. LIST ALL CLASS FOLDERS (LABELS)
classes = [
    d for d in os.listdir(ORIG_DATASET_PATH)
    if os.path.isdir(os.path.join(ORIG_DATASET_PATH, d))
]

# Ensure reproducibility
random.seed(42)

# 5. SPLIT IMAGES FOR EACH CLASS
for class_name in classes:
    class_folder = os.path.join(ORIG_DATASET_PATH, class_name)

    # Gather all image files
    images = [
        f for f in os.listdir(class_folder)
        if f.lower().endswith(('.jpg', '.jpeg', '.png'))
    ]

    # Shuffle images for a random split
    random.shuffle(images)

    # Calculate how many go to train, val, test
    n_total = len(images)
    n_train = int(n_total * TRAIN_RATIO)
    n_val   = int(n_total * VAL_RATIO)
    n_test  = n_total - n_train - n_val

    train_files = images[:n_train]
    val_files   = images[n_train:n_train + n_val]
    test_files  = images[n_train + n_val:]

    # Create subfolders named after the class in each split folder
    train_dir = os.path.join(SPLIT_DATASET_PATH, 'train', class_name)
    val_dir   = os.path.join(SPLIT_DATASET_PATH, 'val', class_name)
    test_dir  = os.path.join(SPLIT_DATASET_PATH, 'test', class_name)

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Copy the images to their respective folders
    for f in train_files:
        shutil.copy(os.path.join(class_folder, f), os.path.join(train_dir, f))
    for f in val_files:
        shutil.copy(os.path.join(class_folder, f), os.path.join(val_dir, f))
    for f in test_files:
        shutil.copy(os.path.join(class_folder, f), os.path.join(test_dir, f))

print("Splitting complete!")
print(f"Train/Val/Test sets are saved in: {SPLIT_DATASET_PATH}")


Mounted at /content/drive
Splitting complete!
Train/Val/Test sets are saved in: /content/drive/MyDrive/DSGP/Splitted_Dataset


In [None]:
import os
import csv

# Update this path to your split dataset folder (the one created previously)
SPLIT_DATASET_PATH = '/content/drive/MyDrive/DSGP/Splitted_Dataset'

def create_csv_for_split(split_name):
    split_dir = os.path.join(SPLIT_DATASET_PATH, split_name)
    csv_data = []

    # Traverse each class folder in the split directory
    for class_name in os.listdir(split_dir):
        class_path = os.path.join(split_dir, class_name)
        if os.path.isdir(class_path):
            # List image files
            for fname in os.listdir(class_path):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    # Get the full absolute path of the image
                    full_path = os.path.abspath(os.path.join(class_path, fname))
                    csv_data.append([full_path, class_name])

    # Write CSV file
    csv_filename = os.path.join(SPLIT_DATASET_PATH, f"{split_name}_labels.csv")
    with open(csv_filename, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(['image_path', 'label'])
        writer.writerows(csv_data)
    print(f"CSV for {split_name} split saved to: {csv_filename}")

# Generate CSV files for train, validation, and test splits
for split in ['train', 'val', 'test']:
    create_csv_for_split(split)


CSV for train split saved to: /content/drive/MyDrive/DSGP/Splitted_Dataset/train_labels.csv
CSV for val split saved to: /content/drive/MyDrive/DSGP/Splitted_Dataset/val_labels.csv
CSV for test split saved to: /content/drive/MyDrive/DSGP/Splitted_Dataset/test_labels.csv
