# Split data!

In [1]:
import os
import random

In [2]:
def get_percentage_of_files(directory, percentage):
    """
    Get a certain percentage of the files in a folder.

    Parameters:
    directory (str): The path to the folder.
    percentage (float): The percentage of files to retrieve (0-100).

    Returns:
    list: A list of selected file paths.
    """
    if not os.path.isdir(directory):
        raise ValueError(f"The directory {directory} does not exist.")
    
    if not (0 <= percentage <= 100):
        raise ValueError("Percentage must be between 0 and 100.")
    
    # Get all files in the directory
    all_files = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    
    # Calculate the number of files to select
    num_files_to_select = int(len(all_files) * (percentage / 100))
    
    # Randomly select the files
    selected_files = random.sample(all_files, num_files_to_select)
    
    return selected_files

# Example usage:


In [8]:
directory_path = os.getcwd() + '/images'
percentage = 100  # For example, to get 20% of the files
selected_files = get_percentage_of_files(directory_path, percentage)
print(selected_files)
print(len(selected_files))

['C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_151.jpg', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_583.jpg', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.1039.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_2974.jpg', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_4610.jpg', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.1379.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.2121.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_2306.jpg', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_3950.jpg', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.1100.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.2002.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.310.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\CANNES_TILES_512x512.977.png', 'C:\\Users\\Guilherme\\Desktop\\dataset/images\\sp_1951.jpg', 'C:\\User

In [9]:
#updated_filepaths = []
for file in selected_files:
    normalized_path = file.replace('\\', '/')
    parts = normalized_path.rsplit('/', 1)
    new_path = f"{parts[0]}/train/{parts[1]}"
    os.rename(normalized_path,new_path)
    #updated_filepaths.append(new_path)

In [10]:
print(updated_filepaths)

['C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_2981.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_2367.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_1099.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_4011.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_3107.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/CANNES_TILES_512x512.189.png', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_1713.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_423.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_41.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/dataset_yolo/images/val/sp_3116.jpg', 'C:/Users/Guilherme/Desktop/Deep-Learning-Project-main/

In [12]:
directory_path = os.getcwd() + '/dataset_yolo/images'
percentage = 100  # For example, to get 20% of the files
selected_files = get_percentage_of_files(directory_path, percentage)
print(selected_files)
print(len(selected_files))

['C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\CANNES_TILES_512x512.1291.png', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\sp_3393.jpg', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\CANNES_TILES_512x512.1409.png', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\sp_2599.jpg', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\CANNES_TILES_512x512.2250.png', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\sp_3839.jpg', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\CANNES_TILES_512x512.1351.png', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\sp_490.jpg', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\sp_2294.jpg', 'C:\\Users\\Guilherme\\Desktop\\Deep-Learning-Project-main/dataset_yolo/images\\sp

In [13]:
for file in selected_files:
    normalized_path = file.replace('\\', '/')
    parts = normalized_path.rsplit('/', 1)
    new_path = f"{parts[0]}/train/{parts[1]}"
    os.rename(normalized_path,new_path)
    #updated_filepaths.append(new_path)