Make master file

In [3]:
import os

# Define the root directory
root_directory = "/n17data/tersenov/Cov_DES_SLICS"

# Open the master text file in write mode
master_file_path = ".././input/master_file_cov.txt"
with open(master_file_path, "w") as master_file:
    # Iterate over the subdirectories matching the pattern "LOS..."
    for subdir in os.listdir(root_directory):
        # Check if the subdirectory name matches the pattern "LOS..."
        if subdir.startswith("LOS") and subdir[3:].isdigit():
            subdir_path = os.path.join(root_directory, subdir)
            
            # Iterate over the files in the subdirectory
            for file_name in os.listdir(subdir_path):
                # Check if the file name matches the desired pattern
                if file_name.startswith("DES_MocksCat_SLICS_4_Bin") and file_name.endswith(".dat"):
                    file_path = os.path.join(subdir_path, file_name)
                    master_file.write(file_path + "\n")


In [1]:
import numpy as np

def parse_cov_SLICS_filenames(file_paths):
    # Make empty recarray to store the data
    data = np.recarray(len(file_paths), dtype=[('bin', int), ('LOS', int), ('tile', int)])

    # Iterate over the file paths and process each file
    for i, file_path in enumerate(file_paths):
        # Extract the file name from the file path
        file_name = file_path.split("/")[-1]
        
        # Split file name into parts
        file_parts = file_name.split("_")
        
        # Extract relevant information
        bin = int(file_parts[4][3:])  # Extract the number after "Bin"
        LOS = int(file_parts[5][3:])  # Extract the number after "LOS"
        tile = int(file_parts[6][1:-4])  # Extract the number after "R"

        # Assign the extracted data to the corresponding fields in the recarray
        data[i]['bin'] = bin
        data[i]['LOS'] = LOS
        data[i]['tile'] = tile

    return data


In [3]:
# Define the path to the "master_file_cov.txt"
master_file_path = ".././input/master_file_cov.txt"

# Read the file paths from the "master_file_cov.txt"
with open(master_file_path, "r") as file:
    file_paths = file.readlines()
    file_paths = [path.strip() for path in file_paths]

# Now, you can call the modified parse_SLICS_filenames function to parse these file paths
parsed_cov_data = parse_cov_SLICS_filenames(file_paths)


In [55]:
los_numbers = np.unique(parsed_cov_data['LOS']) # List of all LOS numbers
num_realizations = 124 # Number of realizations
num_tiles_per_realization = 19 # Number of tiles to select for each realization

num_bins = 4
bin_number = 1

# Create an empty list to store the collections of selected files for this bin
collections_of_files = []

# Iterate through realizations
for realization in range(num_realizations):
    # Create an empty list to store the selected files for this realization
    selected_tiles = []

    # Create a list of available LOS numbers for this realization
    available_los_numbers = list(los_numbers)

    # Iterate through tiles
    for tile_number in range(1, num_tiles_per_realization + 1):
        # Randomly select a LOS from the available options
        selected_los = random.choice(available_los_numbers)

        # Generate the filename pattern for the selected LOS, bin, and tile
        filename_pattern = f"Bin{bin_number}_LOS{selected_los}_R{tile_number}."
        
        # Find the matching file in the list of file paths
        matching_files = [filename for filename in file_paths if filename_pattern in filename]

        selected_tiles.append(matching_files[0])

        # Remove the selected LOS from the list of available LOS options
        available_los_numbers.remove(selected_los)
        
    # Append the list of selected files for this realization to the collections_of_files
    collections_of_files.append(selected_tiles)


The same as the previous cell, but ensuring that each file is only included once.

In [61]:
los_numbers = np.unique(parsed_cov_data['LOS']) # List of all LOS numbers
num_realizations = 124 # Number of realizations
num_tiles_per_realization = 19 # Number of tiles to select for each realization

num_bins = 4
bin_number = 1

# Create an empty list to store the collections of selected files for this bin
collections_of_files = []

# Create a set to keep track of selected filenames
selected_filenames = set()

# Iterate through realizations
for realization in range(num_realizations):
    # Create an empty list to store the selected files for this realization
    selected_tiles = []

    # Create a list of available LOS numbers for this realization
    available_los_numbers = list(los_numbers)

    # Iterate through tiles
    for tile_number in range(1, num_tiles_per_realization + 1):
        # Initialize selected_file as None
        selected_file = None

        # Continue trying different LOS options until a matching file is found
        while not selected_file:
            # Randomly select a LOS from the available options
            selected_los = random.choice(available_los_numbers)

            # Generate the filename pattern for the selected LOS, bin, and tile
            filename_pattern = f"Bin{bin_number}_LOS{selected_los}_R{tile_number}."

            # Find the matching file in the list of file paths that has not been selected before
            matching_files = [filename for filename in file_paths if filename_pattern in filename and filename not in selected_filenames]

            if matching_files:
                selected_file = matching_files[0]
                selected_tiles.append(selected_file)
                selected_filenames.add(selected_file)

    # Append the list of selected files for this realization to the collections_of_files
    collections_of_files.append(selected_tiles)
