In [1]:
import os
import numpy as np
from glob import glob  


In [2]:
import os
import numpy as np
from glob import glob
import shutil

def merge_npz_files_incremental(file_list, output_file, temp_file='temp_merge.npz'):
    """
    Incrementally merge multiple .npz files by concatenating data under each key.
    Saves intermediate results to manage memory usage.
    """
    # Initialize empty dictionary for the first iteration
    merged_data = None
    
    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(os.path.abspath(output_file)), exist_ok=True)
    
    # Generate a unique temporary filename
    temp_file = f'temp_merge_{np.random.randint(10000)}.npz'
    
    try:
        for idx, file in enumerate(file_list):
            print(f"Processing file {idx + 1}/{len(file_list)}: {file}")
            with np.load(file, allow_pickle=True) as data:
                # If it's the first file, initialize merged_data
                if merged_data is None:
                    merged_data = {key: data[key] for key in data.files}
                else:
                    # Load temp file with current merged data
                    with np.load(temp_file, allow_pickle=True) as temp_data:
                        merged_data = {key: np.concatenate((temp_data[key], data[key]), axis=0) for key in data.files}
            # Save the current merged data to the temporary file
            np.savez(temp_file, **merged_data)
            print(f"Intermediate merge saved to {temp_file}")
        
        # After processing all files, save the final merged result
        shutil.copy2(temp_file, output_file)
        print(f"Final merged data saved to {output_file}")
    
    finally:
        # Clean up temporary file
        if os.path.exists(temp_file):
            try:
                os.remove(temp_file)
            except:
                print(f"Warning: Could not remove temporary file {temp_file}")

In [3]:
file_list = glob("E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data/*")  # Adjust path as needed
output_file = 'E:/L2RPN/Dreamer_V3_Implimentation/merged_data.npz'

In [6]:
  # Output file path
merge_npz_files_incremental(file_list[61:80], output_file) # 0:20 is range

Processing file 1/19: E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data\episode_155_data.npz
Intermediate merge saved to temp_merge_9870.npz
Processing file 2/19: E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data\episode_156_data.npz
Intermediate merge saved to temp_merge_9870.npz
Processing file 3/19: E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data\episode_157_data.npz
Intermediate merge saved to temp_merge_9870.npz
Processing file 4/19: E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data\episode_158_data.npz
Intermediate merge saved to temp_merge_9870.npz
Processing file 5/19: E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data\episode_159_data.npz
Intermediate merge saved to temp_merge_9870.npz
Processing file 6/19: E:/L2RPN/Dreamer_V3_Implimentation/data-20241123T131314Z-001/data\episode_15_data.npz
Intermediate merge saved to temp_merge_9870.npz
Processing file 7/19: E:/L2RPN/Dreamer_V3_Implimentation/da