In [2]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from torch.utils.data import DataLoader

import imageio.v3 as iio
from matplotlib.colors import Normalize

from lpfm.data.phys_dataset import SuperDataset
# Load all well datasets
from lpfm.data.dataset_utils import get_datasets

In [None]:
base_path = Path("/hpcwork/rwth1802/coding/Large-Physics-Foundation-Model/data/datasets")
size = (256, 128)
field_names_t0 = ["t0_fields/temperature", "t0_fields/density", "t0_fields/pressure"]
field_names_t1 = ["t1_fields/velocity"]
def print_attrs(name, obj):
    print(f"\n{name}:")
    if isinstance(obj, h5py.Dataset):
        print(f"  Shape: {obj.shape}")
        print(f"  Dtype: {obj.dtype}")
    for key, val in obj.attrs.items():
        print(f"  {key}: {val}")

def check_size(name, obj):
    if isinstance(obj, h5py.Dataset):
        if obj.ndim == 4:
            if obj.shape[2] != size[0] or obj.shape[3] != size[1]:
                print(f"  Name: {name}")
                print(f"  Shape: {obj.shape}")
                print(f"  Dtype: {obj.dtype}")
            if name not in field_names_t0 and name not in field_names_t1:
                print(f"  Name: {name}")
                print(f"  Shape: {obj.shape}")
                print(f"  Dtype: {obj.dtype}")


for file in base_path.glob("**/*.hdf5"):
    # print(file.parent.name, file.name)
    with h5py.File(file, "r") as f:
    
        f.visititems(print_attrs)
        # f.visititems(check_size)

In [6]:
def print_attrs(name, obj):
    print(f"\n{name}:")
    if isinstance(obj, h5py.Dataset):
        print(f"  Shape: {obj.shape}")
        print(f"  Dtype: {obj.dtype}")
    for key, val in obj.attrs.items():
        print(f"  {key}: {val}")

base_path = Path("/hpcwork/rwth1802/coding/Large-Physics-Foundation-Model/data/datasets/acoustic_scattering_inclusions/data/valid")
fil_name = "acoustic_scattering_inclusions_chunk_35_new.hdf5"
with h5py.File(base_path / fil_name, "r") as f:
    f.visititems(print_attrs)



boundary_conditions:

boundary_conditions/x0_wall:
  associated_dims: ['x']
  associated_fields: []
  bc_type: WALL
  sample_varying: False
  time_varying: False

boundary_conditions/x0_wall/mask:
  Shape: (256,)
  Dtype: float32

boundary_conditions/x0_wall/values:
  Shape: (256,)
  Dtype: float32

boundary_conditions/xL_open:
  associated_dims: ['x']
  associated_fields: []
  bc_type: OPEN
  sample_varying: False
  time_varying: False

boundary_conditions/xL_open/mask:
  Shape: (256,)
  Dtype: float32

boundary_conditions/xL_open/values:
  Shape: (256,)
  Dtype: float32

boundary_conditions/y0_wall:
  associated_dims: ['y']
  associated_fields: []
  bc_type: WALL
  sample_varying: False
  time_varying: False

boundary_conditions/y0_wall/mask:
  Shape: (128,)
  Dtype: float32

boundary_conditions/y0_wall/values:
  Shape: (128,)
  Dtype: float32

boundary_conditions/yL_open:
  associated_dims: ['y']
  associated_fields: []
  bc_type: OPEN
  sample_varying: False
  time_varying: False


In [None]:
datasets = [
    "cooled_object_pipe_flow_air",
    "heated_object_pipe_flow_air",
    "object_sym_flow_air",
    "object_sym_flow_water",
    "object_periodic_flow_water",
    "cylinder_sym_flow_water",
    "cylinder_pipe_flow_water",
    "euler_multi_quadrants_periodicBC",
    "turbulent_radiative_layer_2D",
    "rayleigh_benard",
    "shear_flow",
    "twophase_flow",
]

print("Starting dataset inspection with 16 input steps, 1 output step, dt_stride [1,8]")
for dataset in datasets:
    data_config = {
        "datasets": [dataset],
        "data_dir": "/hpcwork/rwth1802/coding/Large-Physics-Foundation-Model/data/datasets",
        "n_steps_input": 16,
        "n_steps_output": 1,
        "dt_stride": [1,8],
        "out_shape": [256, 128],
        "max_samples_per_ds": None,
        "full_trajectory_mode": False,
    }

    loaded_datasets: dict = get_datasets(data_config, split="train")

    ds = loaded_datasets[dataset]
    num_trajectories = sum(ds.n_trajectories_per_file)
    time_steps = ds.n_steps_per_trajectory[0]
    print(f"{dataset} has {num_trajectories} trajectories, {time_steps} time steps per trajectory, and {len(ds)} total samples")
    

print("Starting dataset inspection with 4 input steps, 1 output step, dt_stride 1")
for dataset in datasets:
    data_config = {
        "datasets": [dataset],
        "data_dir": "/hpcwork/rwth1802/coding/Large-Physics-Foundation-Model/data/datasets",
        "n_steps_input":4,
        "n_steps_output": 1,
        "dt_stride": 1,
        "out_shape": [256, 128],
        "max_samples_per_ds": None,
        "full_trajectory_mode": False,
    }

    loaded_datasets: dict = get_datasets(data_config, split="train")

    ds = loaded_datasets[dataset]
    num_trajectories = sum(ds.n_trajectories_per_file)
    time_steps = ds.n_steps_per_trajectory[0]
    print(f"{dataset} has {num_trajectories} trajectories, {time_steps} time steps per trajectory, and {len(ds)} total samples")

    



In [9]:
data_config = {
    "datasets": ["twophase_flow"],
    "data_dir": "/hpcwork/rwth1802/coding/Large-Physics-Foundation-Model/data/datasets",
    "n_steps_input": 1,
    "n_steps_output": 1,
    "dt_stride": 1,
    "out_shape": [256, 128],
    "max_samples_per_ds": None,
    "max_rollout_steps": 500,
    "full_trajectory_mode": True,
}

loaded_datasets = get_datasets(data_config, split="train")
dataset = SuperDataset(loaded_datasets, (256, 128))

In [None]:
time_idx = [0,1,2,3]
# Create figure with subplots
fig, axes = plt.subplots(5, len(time_idx), figsize=(15, 20))

# Plot all five physical fields for each timestep
field_names = ['Pressure', 'Density', 'Temperature', 'Velocity X', 'Velocity Y']
field_data = [pressure, density, temperature, vel_x, vel_y]
cmaps = ['plasma', 'viridis', 'inferno', 'cividis', 'magma']

for row, (field, name, cmap) in enumerate(zip(field_data, field_names, cmaps)):
    for col, t in enumerate(time_idx):
        im = axes[row, col].imshow(field[t,...], cmap=cmap)
        plt.colorbar(im, ax=axes[row, col], label=name)
        
        # Add labels and title
        axes[row, col].set_xlabel('X')
        axes[row, col].set_ylabel('Y')
        axes[row, col].set_title(f'{name} Field (t={time_idx[col]})')

plt.tight_layout()
plt.show()


In [None]:
for traj in range(0,600,10):
    x, full_trajectory = dataset[traj]

    # # take first batch
    # x = x[0,...]
    # y = y[0,...]

    pressure = full_trajectory[..., 0]
    density = full_trajectory[..., 1]
    temperature = full_trajectory[..., 2]
    vel_x = full_trajectory[..., 3]
    vel_y = full_trajectory[..., 4]

    # vel_mag = np.linalg.norm(full_trajectory[:, :, :, 3:], axis=-1)

    # # reduce dim with size 1 to 0
    # vel_mag = np.squeeze(vel_mag)

    # # transpose x and y
    density = np.transpose(density, (0,  2, 1))

    # Create normalized colormap for consistent color scaling
    norm = Normalize(vmin=density.min(), vmax=density.max())

    # Create frames
    frames = []
    for t in range(density.shape[0]):
        # Create figure and plot
        fig, ax = plt.subplots()
        im = ax.imshow(density[t,:,:], norm=norm)
        plt.colorbar(im)
        ax.set_title(f'Time step {t}')
        
        # Convert plot to image array
        fig.canvas.draw()
        frame = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
        frame = frame.reshape(fig.canvas.get_width_height()[::-1] + (4,))[:,:,:3]
        frames.append(frame)
        plt.close()

    # Save as GIF
    output_path = Path(f"/hpcwork/rwth1802/coding/Large-Physics-Foundation-Model/data/datasets/twophase_flow/density_{traj}.gif")
    print(f"Writing {len(frames)} frames to {output_path}")
    iio.imwrite(output_path, frames, fps=30)
    print(f"Animation saved to {output_path}")