In [None]:
# WARNING: I DO NOT RECOMMEND RUNNING ALL FOR THIS NOTEBOOK
# RUN ONE SET OF SIMULATION MERGES AT A TIME

In [None]:
import h5py
import numpy as np
import os
from tqdm import tqdm
# import multiprocessing as mp
# from functools import partial
import shutil

In [None]:
# FUNCTION FROM TIANYU
# ADDED FOLLOWING OPTIONS TO create_dataset: compression="gzip",compression_opts=1, chunks=(16,16,16,16)
# OPTIONS WERE ADDED TO DRAMATICALLY REDUCE FILE SIZES

def append_lm_files(file1_path, file2_path, chunk_size=10):
    """
    Append trajectories from file2 directly to file1, renumbering frames to maintain sequential ordering.
    
    Parameters:
    -----------
    file1_path : str
        Path to the first LM file (will be modified in-place)
    file2_path : str
        Path to the second LM file
    chunk_size : int, optional
        Number of frames to process in each batch
        
    Returns:
    --------
    str
        Path to the modified file1
    """
    print(f"Appending files:\n  - {file2_path} → {file1_path} (in-place)")

    # os.system("cp {file1_path} ")
    
    # Get frame information from both files
    with h5py.File(file1_path, 'r') as file1, h5py.File(file2_path, 'r') as file2:
        trajs1 = file1['Simulations']['0000001']['Lattice']
        trajs2 = file2['Simulations']['0000001']['Lattice']
        
        # Get frame keys and determine the next frame number
        frame_keys1 = sorted(list(trajs1.keys()))
        frame_keys2 = sorted(list(trajs2.keys()))
        
        # Determine the highest frame number in file1
        # Assuming frame keys are in format '0000000000' as a string
        last_frame_num = int(frame_keys1[-1])
        print(f"File 1 has {len(frame_keys1)} frames, last frame is {last_frame_num}")
        print(f"File 2 has {len(frame_keys2)} frames to append")
        
        
    
    # Open file1 for writing and file2 for reading
    with h5py.File(file1_path, 'r+') as file1, h5py.File(file2_path, 'r') as file2:
        trajs1 = file1['Simulations']['0000001']['Lattice']
        trajs2 = file2['Simulations']['0000001']['Lattice']
        
        # Process each frame from file2 and append to file1
        next_frame_num = last_frame_num + 1
        
        for i, frame_key in enumerate(tqdm(frame_keys2, desc="Appending frames")):
            # Get lattice data from file2
            lattice2 = np.array(trajs2[frame_key])
            
            # Create new frame key with sequential numbering
            new_frame_key = f"{next_frame_num + i:010d}"
            
            # Create the new dataset in file1
            trajs1.create_dataset(new_frame_key, data=lattice2, compression="gzip",compression_opts=1, chunks=(16,16,16,16))
    
    print(f"Added {len(frame_keys2)} frames to {file1_path}")
    print(f"New frame count: {next_frame_num + len(frame_keys2) - 1}")
    print(f"Appending complete!")
    return file1_path

In [None]:
# # Example usage:
# if __name__ == "__main__":
  
#     file1 = "/Data1/zane/Models/mincell/Mar10/Mar10_2/MinCell_merged.lm"
#     file2 = "/Data1/zane/Models/mincell/Mar10/Mar10_2/MinCell_restart_5271.lm"
#     # Output will be saved in the file1
#     # Use the modified append function to append directly to file1
#     output = append_lm_files(
#         file1, 
#         file2, 
#         chunk_size=10  # Process 10 frames at a time
#     )

In [None]:
def mergeReplicate(datDir, dateDir, repID, restart_times):

    original = f'{datDir}{dateDir}{repID}/MinCell.lm'
    merged = f'{datDir}{dateDir}{repID}/MinCell_merged.lm'

    os.system(f'rm {merged}')

    os.system(f'cp {original} {merged}')

    for time in restart_times:

        restart = f'{datDir}{dateDir}{repID}/MinCell_restart_{int(time)}.lm'

        output = append_lm_files(
        merged, 
        restart, 
        chunk_size=10  # Process 10 frames at a time
    )

In [None]:
datDir = '/Data1/zane/Models/mincell/Mar10/'
dateDir = 'Mar10_'

# Replicate number and restart times
Mar10reps = {
    "1":[2943,5177,7071],
    "2":[2941,5271],
    "3":[2962,5257],
    "4":[2952,5298],
    "5":[2969,5337],
    "7":[2959,5268],
    "8":[2957,5270,7170],
    "9":[2863,5125,7048],
    "10":[2507,4900,6905]
}
Mar10reps

In [None]:
for repID, restart_times in Mar10reps.items():
    mergeReplicate(datDir, dateDir, repID, restart_times)

In [None]:
datDir = '/Data1/zane/Models/mincell/Mar21/'
dateDir = 'Mar21_'

# Replicate number and restart times
Mar21reps = {
    "1":[2955,5326],
    "2":[2935,5230,7125],
    "3":[2927,5186,7049],
    "4":[2951,5274],
    "5":[2943,5226,7141],
    "6":[2939,5258,7177],
    "7":[2923,5278],
    "8":[2967,5314],
    "9":[2947,5330],
    "10":[2923,5178,7057],
    "11":[2915,5214,7117]
}
Mar21reps

In [None]:
for repID, restart_times in Mar21reps.items():
    mergeReplicate(datDir, dateDir, repID, restart_times)