# edax_to_aztec_h5.py
Convert EDAX .osc (orientations) + .UP2 (patterns) into an AZtec-style HDF5 (H5OINA-like) file.

Usage:
    python edax_to_aztec_h5.py --osc path/to/file.osc --up2 path/to/file.UP2 --out merged.h5

Notes:
- Designed to stream patterns to HDF5 to handle large datasets.
- Attempts to auto-detect OSC column layout; fallback assumptions are provided.
- For unusual EDAX variants, tweak the osc_column_map or header offsets.


In [15]:
import os
import h5py
import numpy as np
import struct

In [16]:
#PATH TO DATA
%cd /Volumes/T7/UP2/Grid6

/Volumes/T7/UP2/Grid6


In [20]:
# ------------------------------------------------------------
# 1. Read EDAX OSC metadata (binary or text-safe)
# ------------------------------------------------------------
def read_osc_binary(filename):
    """
    Reads EDAX binary OSC format (32 bytes per point):
    float32 X, Y, Phi1, PHI, Phi2, IQ, CI
    int32 Phase
    """
    record_dtype = np.dtype([
        ("X", "f4"),
        ("Y", "f4"),
        ("Phi1", "f4"),
        ("PHI", "f4"),
        ("Phi2", "f4"),
        ("IQ", "f4"),
        ("CI", "f4"),
        ("Phase", "i4"),
    ])

    print("Reading BINARY OSC:", filename)
    data = np.fromfile(filename, dtype=record_dtype)
    print(f" -> Loaded {len(data)} points")

    return {
        "X": data["X"],
        "Y": data["Y"],
        "Euler": np.column_stack([data["Phi1"], data["PHI"], data["Phi2"]]),
        "IQ": data["IQ"],
        "CI": data["CI"],
        "Phase": data["Phase"]
    }

# ------------------------------------------------------------
# 2. Stream .UP2 patterns (memory-safe)
# ------------------------------------------------------------
def read_up2_patterns(path, pattern_size=(480, 480)):
    """
    Reads EDAX .UP2 patterns one by one without loading entire file.
    pattern_size: (height, width) in pixels
    """
    h, w = pattern_size
    bytes_per_pattern = h * w  # uint8 per pixel

    with open(path, "rb") as f:
        header = f.read(1024)  # skip EDAX header
        count = 0
        while True:
            chunk = f.read(bytes_per_pattern)
            if len(chunk) < bytes_per_pattern:
                break
            yield np.frombuffer(chunk, dtype=np.uint8).reshape(h, w)
            count += 1
            if count % 50 == 0:
                print(f" → Loaded {count} patterns...")

# ------------------------------------------------------------
# 3. Write HDF5 (Aztec-style, streaming)
# ------------------------------------------------------------
def convert_to_h5(osc_file, up2_file, output_file, pattern_size=(480,480)):
    osc = read_osc_binary(osc_file)

    with h5py.File(output_file, "w") as h5:
        # EBSD metadata
        meta_grp = h5.create_group("ScanMetadata")
        meta_grp.attrs["NumPoints"] = len(osc["X"])
        meta_grp.attrs["Format"] = "H5OINA"
        meta_grp.attrs["Creator"] = "EDAX → Aztec Converter (Python)"

        # Euler and position data
        data_grp = h5.create_group("Data")
        data_grp.create_dataset("EulerAngles", data=osc["Euler"])
        data_grp.create_dataset("X", data=osc["X"])
        data_grp.create_dataset("Y", data=osc["Y"])
        data_grp.create_dataset("IQ", data=osc["IQ"])
        data_grp.create_dataset("CI", data=osc["CI"])
        data_grp.create_dataset("PhaseId", data=osc["Phase"])

        # Patterns (streaming)
        pat_grp = h5.create_group("Patterns")
        dset = pat_grp.create_dataset(
            "images",
            shape=(0, pattern_size[0], pattern_size[1]),
            maxshape=(None, pattern_size[0], pattern_size[1]),
            dtype=np.uint8,
            chunks=(1, pattern_size[0], pattern_size[1])
        )

        print("Reading patterns and writing to HDF5...")
        count = 0
        for pat in read_up2_patterns(up2_file, pattern_size):
            dset.resize(count + 1, axis=0)
            dset[count] = pat
            count += 1

        print(f"Done! Total patterns: {count}")
        print("Saved HDF5 →", output_file)

# ------------------------------------------------------------
# 4. Example Usage in Jupyter
# ------------------------------------------------------------
osc_file = "S3A_Grid6_Step4.osc"
up2_file = "S3A_Grid6_step4.up2"
output_h5 = "S3A_Grid6_Step4_merged_aztec.h5"

convert_to_h5(osc_file, up2_file, output_h5, pattern_size=(480, 480))


Reading BINARY OSC: S3A_Grid6_Step4.osc
 -> Loaded 422927 points
Reading patterns and writing to HDF5...
 → Loaded 50 patterns...
 → Loaded 100 patterns...
 → Loaded 150 patterns...
 → Loaded 200 patterns...
 → Loaded 250 patterns...
 → Loaded 300 patterns...
 → Loaded 350 patterns...
 → Loaded 400 patterns...
 → Loaded 450 patterns...
 → Loaded 500 patterns...
 → Loaded 550 patterns...
 → Loaded 600 patterns...
 → Loaded 650 patterns...
 → Loaded 700 patterns...
 → Loaded 750 patterns...
 → Loaded 800 patterns...
 → Loaded 850 patterns...
 → Loaded 900 patterns...
 → Loaded 950 patterns...
 → Loaded 1000 patterns...
 → Loaded 1050 patterns...
 → Loaded 1100 patterns...
 → Loaded 1150 patterns...
 → Loaded 1200 patterns...
 → Loaded 1250 patterns...
 → Loaded 1300 patterns...
 → Loaded 1350 patterns...
 → Loaded 1400 patterns...
 → Loaded 1450 patterns...
 → Loaded 1500 patterns...
 → Loaded 1550 patterns...
 → Loaded 1600 patterns...
 → Loaded 1650 patterns...
 → Loaded 1700 patterns.