# edax_to_aztec_h5.py
Convert EDAX .osc (orientations) + .UP2 (patterns) into an AZtec-style HDF5 (H5OINA-like) file.

Usage:
    python edax_to_aztec_h5.py --osc path/to/file.osc --up2 path/to/file.UP2 --out merged.h5

Notes:
- Designed to stream patterns to HDF5 to handle large datasets.
- Attempts to auto-detect OSC column layout; fallback assumptions are provided.
- For unusual EDAX variants, tweak the osc_column_map or header offsets.


In [1]:
import os
import h5py
import numpy as np
import struct

In [3]:
#PATH TO DATA
import os
os.chdir("/Volumes/T5 EVO/ExperimentSample5A/Sample5A_Step0/Grid00/Area 1")
print(os.getcwd())


/Volumes/T5 EVO/ExperimentSample5A/Sample5A_Step0/Grid00/Area 1
/Volumes/T5 EVO/ExperimentSample5A/Sample5A_Step0/Grid00/Area 1


In [6]:
# ============================================================
# 1️⃣ Read EDAX binary OSC
# ============================================================
def read_osc_binary(filename):
    dtype = np.dtype([
        ("X", "f4"),
        ("Y", "f4"),
        ("Phi1", "f4"),
        ("PHI", "f4"),
        ("Phi2", "f4"),
        ("IQ", "f4"),
        ("CI", "f4"),
        ("Phase", "i4")
    ])
    return np.fromfile(filename, dtype=dtype)


# ============================================================
# 2️⃣ Stream UP2 patterns (memory safe)
# ============================================================
def stream_up2_patterns(path, pattern_size=(480,480)):
    h, w = pattern_size
    bytes_per_pattern = h * w
    with open(path, "rb") as f:
        f.read(1024)  # skip header
        while True:
            block = f.read(bytes_per_pattern)
            if len(block) < bytes_per_pattern:
                return
            yield np.frombuffer(block, dtype=np.uint8).reshape(h, w)


# ============================================================
# 3️⃣ Helper: get or create group safely
# ============================================================
def get_or_create_group(f, path):
    """Return group, creating parent groups if necessary"""
    if not path:  # empty string or None
        return f
    parts = path.strip("/").split("/")
    grp = f
    for p in parts:
        if p not in grp:
            grp = grp.create_group(p)
        else:
            grp = grp[p]
    return grp


# ============================================================
# 4️⃣ Safe dataset creation
# ============================================================
def safe_create_dataset(f, full_path, shape=None, dtype=None, chunks=None, data=None):
    parts = full_path.strip("/").split("/")
    if len(parts) > 1:
        parent_path = "/".join(parts[:-1])
        parent_grp = get_or_create_group(f, parent_path)
    else:
        parent_grp = f
    name = parts[-1]
    if name in parent_grp:
        if data is not None:
            parent_grp[name][:] = data
        return parent_grp[name]
    else:
        if data is not None:
            return parent_grp.create_dataset(name, data=data)
        else:
            return parent_grp.create_dataset(name, shape=shape, dtype=dtype, chunks=chunks)


# ============================================================
# 5️⃣ Copy template safely
# ============================================================
def copy_template_safe(template_path, output_path):
    with h5py.File(template_path, "r") as template, h5py.File(output_path, "w") as out:
        def copy_item(name, obj):
            if isinstance(obj, h5py.Group):
                grp = get_or_create_group(out, name)
                for k, v in obj.attrs.items():
                    grp.attrs[k] = v
            elif isinstance(obj, h5py.Dataset):
                parts = name.strip("/").split("/")
                if len(parts) > 1:
                    parent_path = "/".join(parts[:-1])
                    get_or_create_group(out, parent_path)
                out.create_dataset(name, shape=obj.shape, dtype=obj.dtype, chunks=obj.chunks)
                for k, v in obj.attrs.items():
                    out[name].attrs[k] = v
        template.visititems(copy_item)
    print("Template structure copied safely to", output_path)


# ============================================================
# 6️⃣ Fill new H5OINA with OSC + patterns
# ============================================================
def fill_h5oina(output_path, osc_data, patterns_gen, pattern_size=(480,480)):
    N = len(osc_data)
    Ny, Nx = pattern_size
    with h5py.File(output_path, "r+") as f:
        # ScanData
        safe_create_dataset(f, "ScanData/EulerAngles", data=np.column_stack([osc_data["Phi1"], osc_data["PHI"], osc_data["Phi2"]]))
        safe_create_dataset(f, "ScanData/Coordinates", data=np.column_stack([osc_data["X"], osc_data["Y"]]))
        safe_create_dataset(f, "ScanData/Phase", data=osc_data["Phase"].astype(np.int32))
        safe_create_dataset(f, "ScanData/BC", data=osc_data["IQ"])
        safe_create_dataset(f, "ScanData/BS", data=osc_data["CI"])
        safe_create_dataset(f, "ScanData/Index", data=np.arange(N, dtype=np.int32))
        safe_create_dataset(f, "ScanData/Bands", data=np.zeros(N))
        safe_create_dataset(f, "ScanData/MAD", data=np.zeros(N))

        # PatternData
        dset = safe_create_dataset(
            f, "PatternData/Patterns",
            shape=(N, Ny, Nx),
            dtype=np.uint8,
            chunks=(1, Ny, Nx)
        )

        print("Streaming patterns into H5OINA...")
        for i, pat in enumerate(patterns_gen):
            if i >= N:
                break
            dset[i] = pat
            if (i+1) % 200 == 0:
                print(f"  {i+1}/{N} patterns written")
    print("H5OINA file successfully written.")


# ============================================================
# 7️⃣ Run full pipeline
# ============================================================
osc_path = "S5A_Grid00_Step0.osc"
up2_path = "S5A_Grid00_Step0.UP2"
template_h5 = "Si_indent.h5oina"
output_h5 = "converted_from_template.h5oina"

# Load OSC
osc_data = read_osc_binary(osc_path)

# Stream patterns (memory safe)
patterns_gen = stream_up2_patterns(up2_path, pattern_size=(480,480))

# Copy template structure safely
copy_template_safe(template_h5, output_h5)

# Fill new H5OINA with your data
fill_h5oina(output_h5, osc_data, patterns_gen, pattern_size=(480,480))

print("Conversion complete! Ready for AZtecCrystal.")


Template structure copied safely to converted_from_template.h5oina
Streaming patterns into H5OINA...
  200/455309 patterns written
  400/455309 patterns written
  600/455309 patterns written
  800/455309 patterns written
  1000/455309 patterns written
  1200/455309 patterns written
  1400/455309 patterns written
  1600/455309 patterns written
  1800/455309 patterns written
  2000/455309 patterns written
  2200/455309 patterns written
  2400/455309 patterns written
  2600/455309 patterns written
  2800/455309 patterns written
  3000/455309 patterns written
  3200/455309 patterns written
  3400/455309 patterns written
  3600/455309 patterns written
  3800/455309 patterns written
  4000/455309 patterns written
  4200/455309 patterns written
  4400/455309 patterns written
  4600/455309 patterns written
  4800/455309 patterns written
  5000/455309 patterns written
  5200/455309 patterns written
  5400/455309 patterns written
  5600/455309 patterns written
  5800/455309 patterns written
  60

In [None]:

# User paths
osc_path = "S5A_Grid00_Step0.osc"
up2_path = "S5A_Grid00_Step0.UP2"
template_h5 = "Si_indent.h5oina"
output_h5 = "converted_from_template.h5oina"