In [None]:
import random
import os
import shutil
import numpy as np
from google.colab import drive

drive.mount('/content/drive')

SPLIT_DATA_PATH = '/content/drive/MyDrive/Spot_IL/Real World Dataset'
TRAIN_PATH = os.path.join(SPLIT_DATA_PATH, 'map01_01_train_5_1')
os.makedirs(TRAIN_PATH, exist_ok=True)

ORIGINAL_DATA_PATH = '/content/drive/MyDrive/Spot_IL/Real World Dataset'
GOAL_DATA_PATH  = '/content/drive/MyDrive/Spot_IL/Real World Dataset/map01_01'
GOAL_IMAGES_PATH = os.path.join(GOAL_DATA_PATH, 'Goal_Images')

train_temp_dir = os.path.join(TRAIN_PATH, 'temp')
os.makedirs(train_temp_dir, exist_ok=True)

train_labels = []
train_index = 0

map_folders = ['map01_01']

def copy_subfolders_with_goal(subfolders, labels_array, src_traj_path, goal_image_path, dst_temp_path,
                              global_index, label_collector):
    for subfolder_name in subfolders:
        src_folder = os.path.join(src_traj_path, subfolder_name)
        dst_folder = os.path.join(dst_temp_path, f"{global_index:05d}")
        shutil.copytree(src_folder, dst_folder)
        if goal_image_path and os.path.exists(goal_image_path):
            shutil.copy2(goal_image_path, os.path.join(dst_folder, "goal.jpg"))
        idx = int(subfolder_name)
        label_collector.append(labels_array[idx])
        global_index += 1
    return global_index

for map_name in map_folders:
    map_path = os.path.join(ORIGINAL_DATA_PATH, map_name)
    if not os.path.isdir(map_path):
        continue

    print("------------------------------------------")
    print("Processing Map:", map_name)

    all_traj_dirs = [
        d for d in os.listdir(map_path)
        if os.path.isdir(os.path.join(map_path, d)) and d.startswith("traj_")
    ]
    all_traj_dirs.sort()
    random.shuffle(all_traj_dirs)
    print(f"  Found {len(all_traj_dirs)} trajectories: {all_traj_dirs}")

    used_train_count = 0

    for i, traj_name in enumerate(all_traj_dirs, start=1):
        traj_path = os.path.join(map_path, traj_name)
        labels_file = os.path.join(traj_path, "labels.npy")
        if not os.path.exists(labels_file):
            print(f"  WARNING: no labels.npy in {traj_path}, skipping.")
            continue

        goal_image_path = os.path.join(GOAL_IMAGES_PATH, f"{traj_name}.jpg")
        if not os.path.exists(goal_image_path):
            print(f"  WARNING: no goal image for {traj_name} at {goal_image_path}, skipping goal image copy.")
            goal_image_path = None

        used_train_count += 1
        if used_train_count % 5 == 0:
            print(f"    Completed {used_train_count} trajectories so far...")
        labels_arr = np.load(labels_file)
        sub_items = os.listdir(traj_path)
        step_dirs = [d for d in sub_items if d.isdigit() and os.path.isdir(os.path.join(traj_path, d))]
        step_dirs.sort()
        train_index = copy_subfolders_with_goal(step_dirs, labels_arr, traj_path, goal_image_path,
                                                train_temp_dir, train_index, train_labels)

    print(f"  [MAP {map_name}] Used {used_train_count} trajectories.")

np.save(os.path.join(train_temp_dir, 'labels.npy'), train_labels)

train_temp_items = os.listdir(train_temp_dir)
train_dirs_only = [d for d in train_temp_items if d.isdigit() and os.path.isdir(os.path.join(train_temp_dir, d))]
train_dirs_only.sort()
order = list(range(len(train_dirs_only)))
random.shuffle(order)

labels_arr = np.load(os.path.join(train_temp_dir, 'labels.npy'), allow_pickle=True)
shuffled_train_labels = []
for i in range(len(train_dirs_only)):
    old_idx = order[i]
    old_name = train_dirs_only[old_idx]
    src_folder = os.path.join(train_temp_dir, old_name)
    dst_folder = os.path.join(TRAIN_PATH, f"{i:05d}")
    os.rename(src_folder, dst_folder)
    shuffled_train_labels.append(labels_arr[old_idx])
np.save(os.path.join(TRAIN_PATH, 'labels.npy'), shuffled_train_labels)

shutil.rmtree(train_temp_dir)

print("Done!")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
------------------------------------------
Processing Map: map01_01
  Found 187 trajectories: ['traj_14', 'traj_08', 'traj_98', 'traj_79', 'traj_77', 'traj_129', 'traj_38', 'traj_30', 'traj_22', 'traj_47', 'traj_121', 'traj_02', 'traj_168', 'traj_74', 'traj_180', 'traj_166', 'traj_145', 'traj_112', 'traj_71', 'traj_09', 'traj_137', 'traj_04', 'traj_167', 'traj_69', 'traj_41', 'traj_169', 'traj_149', 'traj_62', 'traj_87', 'traj_122', 'traj_12', 'traj_115', 'traj_88', 'traj_61', 'traj_126', 'traj_52', 'traj_49', 'traj_106', 'traj_150', 'traj_125', 'traj_23', 'traj_114', 'traj_01', 'traj_05', 'traj_173', 'traj_110', 'traj_93', 'traj_17', 'traj_03', 'traj_16', 'traj_72', 'traj_07', 'traj_56', 'traj_99', 'traj_51', 'traj_151', 'traj_184', 'traj_170', 'traj_101', 'traj_83', 'traj_15', 'traj_175', 'traj_172', 'traj_111', 'traj_25', 'traj_90', 'traj_100', 'traj_57', 