In [5]:
import glob
import os
import random
import shutil

def select_random_sample(input_folder, output_folder, num_files, seed=None):
    """
    Select a random sample of CSV files from one folder and copy them to another.

    Parameters:
        input_folder (str): Path to the folder containing CSV files.
        output_folder (str): Path to the folder to copy selected files into.
        num_files (int): Number of random files to select.
        seed (int, optional): Random seed for reproducibility.
    """

    # Get all CSV files in the input folder
    csv_files = glob.glob(os.path.join(input_folder, '*.csv'))
    
    # Check if any CSV files were found
    if not csv_files:
        raise ValueError(f"No CSV files found in folder: {input_folder}")

    # Set random seed (optional)
    if seed is not None:
        random.seed(seed)

    # Randomly choose files (without replacement)
    selected_files = random.sample(csv_files, min(num_files, len(csv_files)))

    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Copy each selected file to the output folder
    for file_path in selected_files:
        shutil.copy(file_path, output_folder)

    print(f"✅ Copied {len(selected_files)} random files from '{input_folder}' → '{output_folder}'")

    return selected_files

In [13]:
input_folder = 'asia/Asia'
output_folder = "raw_sampled_files"
num_files = 5
seed = 42

select_random_sample(input_folder=input_folder, output_folder=output_folder, num_files=num_files, seed=seed)


ValueError: No CSV files found in folder: asia/Asia