## Process .h5 files into .data and .xyz files to load into Ovito

In ovito, load in the .data file (top left folder icon)
Then go into the Pipelines menu (first icon on top right of the window)
Enter the "Add modification..." scrollable menu and select "Load trajectory"
On the right pannel, select the foler icon underneath "Trajectory Source"
Select the corresponding .xyz file

Example usage


In [14]:
import h5py, numpy as np, os
from glob import glob

def write_ovito_lammps(h5_input, chains, monomer_types,
                       output_data="topology.data",
                       output_xyz="trajectory.xyz"):
    """
    Writes a LAMMPS datafile with atoms and bonds, plus XYZ trajectory for OVITO GUI import.

    h5_input: str or list of .h5 paths or wildcard
    chains: list of (start,end) per chain
    monomer_types: np.array shape(n_chains, monomers), values 0/1/2
    """
    monomer_labels = ['A', 'B', 'C']  # atom types 1,2,3 in LAMMPS
    n_atoms = sum(end - start for start, end in chains)
    n_bonds = sum(end - start - 1 for start, end in chains)

    # Resolve input files
    if isinstance(h5_input, str):
        match = glob(h5_input)
        h5_files = sorted(match) if not os.path.isfile(h5_input) else [h5_input]
    else:
        h5_files = sorted(set(h5_input))
    if not h5_files:
        raise FileNotFoundError("No .h5 files found")

    # Write LAMMPS topology file
    with open(output_data, 'w') as df:
        df.write("LAMMPS data file via script\n\n")
        df.write(f"{n_atoms} atoms\n")
        df.write("3 atom types\n")
        df.write(f"{n_bonds} bonds\n")
        df.write("1 bond types\n\n")
        df.write("0.0 10000.0 xlo xhi\n0.0 10000.0 ylo yhi\n0.0 10000.0 zlo zhi\n\n")

        df.write("Masses\n\n")
        df.write("1 1.0\n2 1.0\n3 1.0\n\n")

        df.write("Bond Coeffs\n\n")
        df.write("1 1.0 1.0\n\n")  # Dummy coeffs: k=1.0, r0=1.0

        df.write("Atoms # atomic\n\n")
        atom_id = 1
        for chain_idx, (start, end) in enumerate(chains):
            for local_idx in range(end - start):
                atom_type = monomer_types[chain_idx, local_idx] + 1  # Convert 0/1/2 to 1/2/3
                df.write(f"{atom_id} {atom_type} 0.0 0.0 0.0\n")
                atom_id += 1

        df.write("\nBonds\n\n")
        bond_id = 1
        atom_idx = 1
        for start, end in chains:
            for _ in range(end - start - 1):
                df.write(f"{bond_id} 1 {atom_idx} {atom_idx + 1}\n")
                bond_id += 1
                atom_idx += 1
            atom_idx += 1  # skip over chain break

    # Write XYZ trajectory
    with open(output_xyz, 'w') as xyz:
        for h5f in h5_files:
            with h5py.File(h5f, 'r') as f:
                for frame in sorted(f.keys(), key=int):
                    pos = f[frame]['pos'][:] * 10  # μm → Å
                    xyz.write(f"{n_atoms}\n")
                    xyz.write(f"Frame {os.path.basename(h5f)}:{frame}\n")
                    for chain_idx, (start, end) in enumerate(chains):
                        for local_idx in range(end - start):
                            i = start + local_idx
                            label = monomer_labels[monomer_types[chain_idx, local_idx]]
                            x, y, z = pos[i]
                            xyz.write(f"{label} {x:.3f} {y:.3f} {z:.3f}\n")



In [15]:
import glob
import numpy as np

h5_files = glob.glob("C:/Users/arnav/Personal - Arnav Chhajed/Northwestern/genome_organization/examples/arnav/test_output/blocks_*.h5")
chains = [(0,50), (50,100)]
monomer_types = np.load("C:/Users/arnav/Personal - Arnav Chhajed/Northwestern/genome_organization/examples/arnav/test_output/monomer_types.npy")
monomer_types = monomer_types.reshape((2, 50))

write_ovito_lammps(h5_files, chains, monomer_types,output_data="C:/Users/arnav/Personal - Arnav Chhajed/Northwestern/genome_organization/examples/arnav/test_output/EXAMPLE_topology.data",output_xyz="C:/Users/arnav/Personal - Arnav Chhajed/Northwestern/genome_organization/examples/arnav/test_output/EXAMPLE_trajectory.xyz")