**1. Introduction**

In [None]:
# Build 3D Acoustic Tensor from Replicate Data

# This notebook parses all `*_Sensing.json` files for a given replicate inside a folder,
# matches them with corresponding `*_NozzleCheck_*_mBar.bmp` images to extract pressure values,
# and builds a **3D tensor (m × n × k)** where:
#
# - m = number of nozzles (A–D combined)
# - n = number of samples per waveform (~180)
# - k = number of pressure levels (one per file)
#
# Each nozzle is labeled as A1, A2, …, D320, etc.


**2. Configuration**

In [2]:
import os, re, json, glob
import numpy as np
import matplotlib.pyplot as plt

# ---------------- CONFIGURATION ----------------
folder = "2025-05-26-03-00-51-PM_Gradually_Lowering_Pressure_0dot4mBar_Steps"   # Path to folder containing *_Sensing.json and *_NozzleCheck_*.bmp
replicate = 2       # Replicate number to process (1 or 2)
# ------------------------------------------------


**3. File matching**

In [3]:
# Helper patterns (we must double the braces {{ }} so .format() doesn’t eat them)
json_pattern = re.compile(r"(?P<ts>\d{{4}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{6}})_{}_Sensing\.json$".format(replicate))
bmp_pattern = re.compile(r"(?P<ts>\d{{4}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{6}})_{}_NozzleCheck_[-+]?\d+(?:\.\d+)?_mBar\.bmp$".format(replicate))
pressure_pattern = re.compile(r"NozzleCheck_([-+]?\d+(?:\.\d+)?)_mBar", re.IGNORECASE)

# Gather files
json_files = sorted(glob.glob(os.path.join(folder, f"*_{replicate}_Sensing.json")))
bmp_files  = sorted(glob.glob(os.path.join(folder, f"*_{replicate}_NozzleCheck_*_mBar.bmp")))

print(f"Found {len(json_files)} sensing JSONs and {len(bmp_files)} BMP images for replicate {replicate}.")

# Build a timestamp -> pressure lookup from BMPs
pressure_lookup = {}
for bmp in bmp_files:
    name = os.path.basename(bmp)
    ts_match = re.match(r"(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}-\d{6})", name)
    if not ts_match:
        continue
    ts = ts_match.group(1)
    m = pressure_pattern.search(name)
    if m:
        pressure = float(m.group(1))
        pressure_lookup[ts] = pressure

print(f"Pressure lookup built for {len(pressure_lookup)} timestamps.")



Found 95 sensing JSONs and 109 BMP images for replicate 2.
Pressure lookup built for 109 timestamps.


**4. Parse JSONs and build tensor**

In [None]:
import os, re, json, glob
import numpy as np

# --- CONFIGURATION ---
folder = "2025-05-26-03-00-51-PM_Gradually_Lowering_Pressure_0dot4mBar_Steps"
replicate = 2
# ---------------------

# Helper patterns (must double braces {{ }} so .format() doesn’t consume them)
json_pattern = re.compile(
    r"(?P<ts>\d{{4}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{6}})_{}_Sensing\.json$".format(replicate)
)
bmp_pattern = re.compile(
    r"(?P<ts>\d{{4}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{2}}-\d{{6}})_{}_NozzleCheck_[-+]?\d+(?:\.\d+)?_mBar\.bmp$".format(replicate)
)
pressure_pattern = re.compile(r"NozzleCheck_([-+]?\d+(?:\.\d+)?)_mBar", re.IGNORECASE)


# Gather files
json_files = sorted(glob.glob(os.path.join(folder, f"*_{replicate}_Sensing.json")))
bmp_files  = sorted(glob.glob(os.path.join(folder, f"*_{replicate}_NozzleCheck_*_mBar.bmp")))

print(f"Found {len(json_files)} sensing JSONs and {len(bmp_files)} BMP images for replicate {replicate}.")

# Build pressure lookup
pressure_lookup = {}
for bmp in bmp_files:
    name = os.path.basename(bmp)
    ts_match = re.match(r"(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}-\d{6})", name)
    if not ts_match:
        continue
    ts = ts_match.group(1)
    m = pressure_pattern.search(name)
    if m:
        pressure = float(m.group(1))
        pressure_lookup[ts] = pressure

print(f"Pressure lookup built for {len(pressure_lookup)} timestamps.")

# --- Build Tensor ---
all_pressures = []
all_waveforms = []
bad_files = []  # store problematic JSONs

for js in json_files:
    ts = re.search(r"(\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}-\d{6})", os.path.basename(js)).group(1)
    pressure = pressure_lookup.get(ts)
    if pressure is None:
        print(f" No matching BMP pressure for {js}, skipping.")
        continue

    try:
        with open(js, 'r') as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f" JSON error in {js}: {e}")
        bad_files.append(js)
        continue

    combined_waveforms = []
    for bank_label in ['A', 'B', 'C', 'D']:
        key = f'Bank_{bank_label}'
        if key not in data:
            continue
        for nozzle_idx, waveform in enumerate(data[key], start=1):
            combined_waveforms.append(waveform)

    all_pressures.append(pressure)
    all_waveforms.append(np.array(combined_waveforms))

# Sort and stack
sorted_idx = np.argsort(all_pressures)
all_pressures = np.array(all_pressures)[sorted_idx]
all_waveforms = [all_waveforms[i] for i in sorted_idx]

tensor = np.stack(all_waveforms, axis=2)
nozzles = [f"{b}{i}" for b in ['A','B','C','D'] for i in range(1, len(all_waveforms[0])//4 + 1)]

print(f"\n✅ Tensor built: shape = {tensor.shape} (nozzles × samples × pressures)")
print(f"Pressures: {all_pressures}")
print(f"Waveform lengths: {[len(row) for wf_set in all_waveforms for row in wf_set][0]} samples each")

if bad_files:
    print(f"\n Skipped {len(bad_files)} corrupted JSON files:")
    for bf in bad_files:
        print(f"   - {os.path.basename(bf)}")


lengths = [len(row) for wf_set in all_waveforms for row in wf_set]
print(f"Waveform lengths range: {min(lengths)} – {max(lengths)}")



Found 95 sensing JSONs and 109 BMP images for replicate 2.
Pressure lookup built for 109 timestamps.

✅ Tensor built: shape = (1280, 124, 95) (nozzles × samples × pressures)
Pressures: [-87.6 -87.2 -86.8 -86.4 -86.  -85.6 -85.2 -84.8 -84.4 -84.  -83.6 -83.2
 -82.8 -82.4 -82.  -81.6 -81.2 -80.8 -80.4 -80.  -79.6 -79.2 -78.8 -78.4
 -78.  -77.6 -77.2 -76.8 -76.4 -76.  -75.6 -75.2 -74.8 -74.4 -74.  -73.6
 -73.2 -72.8 -72.4 -72.  -71.6 -71.2 -70.8 -70.4 -70.  -69.6 -69.2 -68.8
 -68.4 -68.  -67.6 -67.2 -66.8 -66.4 -66.  -65.6 -65.2 -64.8 -64.4 -64.
 -63.6 -63.2 -62.8 -62.4 -62.  -61.6 -61.2 -60.8 -60.4 -60.  -59.6 -59.2
 -58.8 -58.4 -58.  -57.6 -57.2 -56.8 -56.4 -56.  -55.6 -55.2 -54.8 -54.4
 -54.  -53.6 -53.2 -52.8 -52.4 -52.  -51.6 -51.2 -50.8 -50.4 -50. ]
Waveform lengths: 124 samples each
Waveform lengths range: 124 – 124


**5. Save results**

In [5]:
out_tensor = f"data/preprocessed/tensor_replicate_{replicate}_lowering.npy"
out_press  = f"data/preprocessed/pressures_replicate_{replicate}_lowering.json"
out_nozz   = f"data/preprocessed/nozzles_replicate_{replicate}_lowering.json"

np.save(out_tensor, tensor.tolist())
with open(out_press, 'w') as f: json.dump(all_pressures.tolist(), f, indent=2)
with open(out_nozz, 'w') as f: json.dump(nozzles, f, indent=2)

print(f"Saved: {out_tensor}, {out_press}, {out_nozz}")


Saved: data/preprocessed/tensor_replicate_2_lowering.npy, data/preprocessed/pressures_replicate_2_lowering.json, data/preprocessed/nozzles_replicate_2_lowering.json


**6. Visualization example**

In [None]:
# --- Visualization 1 (range-slider version): Interactive waveform evolution for a given nozzle across pressures ---
import plotly.graph_objects as go
from ipywidgets import interact, FloatRangeSlider
import numpy as np
import json
import os

# Configuration
replicate = 2           # which replicate to load
target_nozzle = "C316"   # which nozzle to visualize

# Paths for preprocessed data
base_dir = "data/preprocessed"
tensor_path = os.path.join(base_dir, f"tensor_replicate_{replicate}_lowering.npy")
press_path  = os.path.join(base_dir, f"pressures_replicate_{replicate}_lowering.json")
nozz_path   = os.path.join(base_dir, f"nozzles_replicate_{replicate}_lowering.json")

# Check file existence
if not all(os.path.exists(p) for p in [tensor_path, press_path, nozz_path]):
    print(f"⚠️ Missing preprocessed files for replicate {replicate}. Please generate them first.")
else:
    # Load preprocessed data
    tensor = np.array(np.load(tensor_path, allow_pickle=True))
    with open(press_path, 'r') as f:
        all_pressures = np.array(json.load(f), dtype=float)
    with open(nozz_path, 'r') as f:
        nozzles = json.load(f)

    print(f" Loaded replicate {replicate}: tensor {tensor.shape}, {len(nozzles)} nozzles, {len(all_pressures)} pressures.")

    # Check if the target nozzle exists
    if target_nozzle not in nozzles:
        print(f" Nozzle {target_nozzle} not found in replicate {replicate}.")
    else:
        idx = nozzles.index(target_nozzle)

        # --- Range slider for selecting pressure interval ---
        slider = FloatRangeSlider(
            value=[float(all_pressures.min()), float(all_pressures.max())],
            min=float(all_pressures.min()),
            max=float(all_pressures.max()),
            step=0.4,  # depends on your step size (0.4 mBar in your case)
            description='Pressure range (mBar):',
            continuous_update=False,
            layout={'width': '80%'}
        )

        def plot_pressure_range(pressure_range):
            pmin, pmax = pressure_range
            mask = (all_pressures >= pmin) & (all_pressures <= pmax)
            selected_pressures = all_pressures[mask]
            selected_indices = np.where(mask)[0]

            fig = go.Figure()
            for i, p in zip(selected_indices, selected_pressures):
                fig.add_trace(go.Scatter(
                    y=tensor[idx, :, i],
                    x=list(range(tensor.shape[1])),
                    mode='lines',
                    name=f"{p:.1f} mBar",
                    hovertemplate=f"<b>Pressure:</b> {p:.1f} mBar<br>Sample: %{ 'x' }<br>Amplitude: %{ 'y' }<extra></extra>"
                ))

            fig.update_layout(
                title=f"Waveform evolution — Nozzle {target_nozzle} (Replicate {replicate})",
                xaxis_title="Sample index",
                yaxis_title="Amplitude",
                legend=dict(font=dict(size=8), orientation="v", bgcolor="rgba(255,255,255,0.7)"),
                height=500,
                margin=dict(l=40, r=200, t=60, b=40)
            )
            fig.show()

        interact(plot_pressure_range, pressure_range=slider)


✅ Loaded replicate 2: tensor (1280, 124, 95), 1280 nozzles, 95 pressures.


interactive(children=(FloatRangeSlider(value=(-87.6, -50.0), continuous_update=False, description='Pressure ra…

**7. Visualization example 2**

In [None]:
# --- Visualization 3: Interactive waveform evolution with hover tooltips ---
import plotly.graph_objects as go
from ipywidgets import interact, IntRangeSlider
import numpy as np
import json
import os

# Configuration
replicate = 2          # which replicate to load
pressure_value = -87.6 # which pressure to visualize
bank = "C"             # which bank (A, B, C, D)

# Load preprocessed data
base_dir = "data/preprocessed"
tensor_path = os.path.join(base_dir, f"tensor_replicate_{replicate}_lowering.npy")
press_path  = os.path.join(base_dir, f"pressures_replicate_{replicate}_lowering.json")
nozz_path   = os.path.join(base_dir, f"nozzles_replicate_{replicate}_lowering.json")

if not all(os.path.exists(p) for p in [tensor_path, press_path, nozz_path]):
    print(f" Missing preprocessed files for replicate {replicate}. Please generate them first.")
else:
    # Load preprocessed data
    tensor = np.array(np.load(tensor_path, allow_pickle=True))
    with open(press_path, 'r') as f:
        all_pressures = json.load(f)
    with open(nozz_path, 'r') as f:
        nozzles = json.load(f)

    if pressure_value not in all_pressures:
        print(f" Pressure {pressure_value} mBar not found in replicate {replicate}.")
    else:
        k = all_pressures.index(pressure_value)

        # Extract bank-specific nozzles
        bank_nozzles = [n for n in nozzles if n.startswith(bank)]
        bank_indices = [nozzles.index(n) for n in bank_nozzles]

        print(f"✅ Loaded replicate {replicate} | pressure {pressure_value} mBar | bank {bank}")
        print(f"Found {len(bank_nozzles)} nozzles in Bank {bank}")

        # --- Range slider to filter nozzle subset ---
        range_slider = IntRangeSlider(
            value=[0, min(20, len(bank_nozzles))],
            min=0, max=len(bank_nozzles),
            step=1, description='Nozzle range:',
            continuous_update=False,
            layout={'width': '80%'}
        )

        def plot_bank_nozzles(range_nozzles):
            start, end = range_nozzles
            selected_nozzles = bank_nozzles[start:end]
            selected_indices = bank_indices[start:end]

            fig = go.Figure()
            for n, idx_nozzle in zip(selected_nozzles, selected_indices):
                fig.add_trace(go.Scatter(
                    y=tensor[idx_nozzle, :, k],
                    x=list(range(tensor.shape[1])),
                    mode='lines',
                    name=n,
                    hovertemplate=f"<b>Nozzle:</b> {n}<br>Sample: %{ 'x' }<br>Amplitude: %{ 'y' }<extra></extra>"
                ))

            fig.update_layout(
                title=f"Waveform evolution — Bank {bank}, {pressure_value} mBar (Replicate {replicate})",
                xaxis_title="Sample index",
                yaxis_title="Amplitude",
                legend=dict(font=dict(size=8)),
                height=500,
                margin=dict(l=40, r=200, t=50, b=40)
            )
            fig.show()

        interact(plot_bank_nozzles, range_nozzles=range_slider)



✅ Loaded replicate 2 | pressure -87.6 mBar | bank C
Found 320 nozzles in Bank C


interactive(children=(IntRangeSlider(value=(0, 20), continuous_update=False, description='Nozzle range:', layo…