In [None]:
from sklearn.model_selection import train_test_split
import os
import shutil
import numpy as np
from collections import Counter

# Get class distribution from ../combined_hybrid_data/
data_dir = '../final_testset/2021'
class_dirs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]

# Collect all file paths and labels
file_paths = []
labels = []

for class_name in class_dirs:
    class_path = os.path.join(data_dir, class_name)
    files = os.listdir(class_path)
    for file in files:
        file_paths.append(os.path.join(class_path, file))
        labels.append(class_name)

# Split data while preserving class ratios
X_train, X_val, y_train, y_val = train_test_split(
    file_paths, labels, test_size=0.15, random_state=42, stratify=labels
)

# Create train and val directories
train_dir = '../trainingDatav3/train'
val_dir = '../trainingDatav3/val'

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Create class subdirectories and copy files
for class_name in class_dirs:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)

# Copy training files
for file_path, label in zip(X_train, y_train):
    filename = os.path.basename(file_path)
    dst_path = os.path.join(train_dir, label, filename)
    shutil.copy2(file_path, dst_path)

# Copy validation files
for file_path, label in zip(X_val, y_val):
    filename = os.path.basename(file_path)
    dst_path = os.path.join(val_dir, label, filename)
    shutil.copy2(file_path, dst_path)

print(f"Train set: {len(X_train)} files")
print(f"Val set: {len(X_val)} files")
print("Class distribution in train:", Counter(y_train))
print("Class distribution in val:", Counter(y_val))

In [4]:
import os
data_dir = '../trainingDatav3'
class_dirs = ["var","yok"]
# Function to rename files in a directory based on their class
def rename_files_by_class(base_dir):
    for split in ['train','val']:
        split_dir = os.path.join(base_dir, split)
        
        for class_name in class_dirs:
            class_dir = os.path.join(split_dir, class_name)
            
            # print(class_dir)
            if os.path.exists(class_dir):
                files = os.listdir(class_dir)
                
                for filename in files:
                    if filename.endswith('.png'):
                        # Get the base name without extension
                        base_name = os.path.splitext(filename)[0]
                        if(base_name.split('_')[-1] == class_name):
                            print(f"Skipping {filename} as it already has the class suffix.")
                            continue
                        # Create new filename with class suffix
                        new_filename = f"{base_name}_{class_name}.png"
                        
                        # Get full paths
                        old_path = os.path.join(class_dir, filename)
                        new_path = os.path.join(class_dir, new_filename)
                        
                        # Rename the file
                        os.rename(old_path, new_path)
                        
                print(f"Renamed files in {class_dir}")

# Rename all files in the training data directory
rename_files_by_class(data_dir)
print("All files have been renamed with their class suffixes.")

Skipping 10002_var.png as it already has the class suffix.
Skipping 100091_var.png as it already has the class suffix.
Skipping 10017_var.png as it already has the class suffix.
Skipping 100236_var.png as it already has the class suffix.
Skipping 10024_var.png as it already has the class suffix.
Skipping 10028_var.png as it already has the class suffix.
Skipping 10033_var.png as it already has the class suffix.
Skipping 10036_var.png as it already has the class suffix.
Skipping 10038_var.png as it already has the class suffix.
Skipping 10045_var.png as it already has the class suffix.
Skipping 10046_var.png as it already has the class suffix.
Skipping 10047_var.png as it already has the class suffix.
Skipping 10049_var.png as it already has the class suffix.
Skipping 10050_var.png as it already has the class suffix.
Skipping 10052_var.png as it already has the class suffix.
Skipping 10053_var.png as it already has the class suffix.
Skipping 10062_var.png as it already has the class suf