In [None]:
import os
import shutil
import pandas as pd

# Paths (update as per your directory structure)
csv_path = "path_to_your_csv_file.csv"  # Replace with the path to your CSV file
png_folder = "path_to_your_png_folder"  # Replace with the folder containing your PNGs
output_dir = "organized_data"  # Output directory for the organized structure

# Define the subfolder structure
folders = {
    "streak_train": os.path.join(output_dir, "train", "streak"),
    "no_streak_train": os.path.join(output_dir, "train", "no_streak"),
    "streak_val": os.path.join(output_dir, "val", "streak"),
    "no_streak_val": os.path.join(output_dir, "val", "no_streak"),
}

# Create the directories
for folder in folders.values():
    os.makedirs(folder, exist_ok=True)

# Load the CSV
data = pd.read_csv(csv_path)

# Split data into training (80%) and validation (20%) sets
train_data = data.sample(frac=0.8, random_state=42)
val_data = data.drop(train_data.index)

# Function to move files based on label and split
def move_files(dataframe, split):
    for _, row in dataframe.iterrows():
        png_name = row["output"]  # Assuming 'output' column has the PNG filename
        label = row["label"]  # 1 for 'streak', 0 for 'no_streak'

        src_path = os.path.join(png_folder, png_name)
        if label == 1:
            dest_folder = folders[f"streak_{split}"]
        else:
            dest_folder = folders[f"no_streak_{split}"]

        dest_path = os.path.join(dest_folder, png_name)
        if os.path.exists(src_path):
            shutil.copy(src_path, dest_path)  # Use shutil.copy if you want to keep original files
        else:
            print(f"File {png_name} not found in {png_folder}")

# Move training files
move_files(train_data, "train")

# Move validation files
move_files(val_data, "val")

print("Data organized successfully!")
