Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
23cbba8
Removed the argument "batch_size" from the trainers.
renierts Feb 13, 2026
dcd6d53
Merge remote-tracking branch 'origin/dev-peter' into dev-peter
renierts Feb 13, 2026
030631b
Bugfix in the dataset class. When iterating over movie configurations…
renierts Feb 13, 2026
f07f767
Added base script for video reconstruction. Copied from Aza's branch …
renierts Feb 13, 2026
4ba4756
Added base script for video reconstruction. Copied from Aza's branch …
renierts Feb 13, 2026
16e9bfb
Minor changes in the example scripts. More preprocessing options for …
renierts Feb 14, 2026
cf0ba70
Resolved conflicts. Can be merged now.
renierts Feb 14, 2026
475a6e2
Fixed a bug where the dataset class failed when using multiple worker…
renierts Feb 14, 2026
54f5e89
Lots of bugfixes in the dataset, trainer, and models.
renierts Feb 16, 2026
e4b333e
Merge branch 'foundation_model' into dev-peter
renierts Feb 16, 2026
3195edc
Extended checkpointing - the trainer stores now:
renierts Feb 16, 2026
77c0315
Merge remote-tracking branch 'origin/dev-peter' into dev-peter
renierts Feb 16, 2026
a0edc4d
Extended checkpointing - the trainer stores now:
renierts Feb 16, 2026
4d01984
Adapted the other reconstruction scripts to match the new API.
renierts Feb 16, 2026
8b457c8
Bugfix in the dataset class. When splitting inputs and targets, I for…
renierts Feb 16, 2026
930fd27
Prepared an option to preprocess movies. This has to be fully integra…
renierts Feb 16, 2026
9e2fec0
Merge branch 'foundation_model' into dev-peter
renierts Feb 16, 2026
357ada7
Added a baseline fusion transformer for latent space prediction.
renierts Feb 17, 2026
43e3434
Merge branch 'foundation_model' of https://github.com/PlasmaControl/F…
renierts Feb 17, 2026
1fc8fc4
Foundation model (#56)
renierts Feb 17, 2026
5482428
Moved some remaining scripts to the correct subdirectories.
renierts Feb 17, 2026
abb3057
Still working on preparing the dataset. This is not ready to push. Pr…
renierts Feb 17, 2026
86aca34
Updated the data loader. Bugfix for loading the correct slices from H…
Feb 19, 2026
fe38d8c
Added scripts for data fetching in Omega.
Feb 24, 2026
ad3b3ff
Added a documentation for setting up Globus CLI on Omega and start a …
Feb 24, 2026
ebe4c3a
Updated README.md:
Feb 24, 2026
aa78e87
More PTData to fetch.
Feb 24, 2026
e6cb74f
PEP-8 compatible code.
Feb 25, 2026
aa03966
Generalized make_preprocessing_stats.py and made the function compute…
Feb 25, 2026
e7c8c9c
A lot of bugfixes in the dataloader and prepare_data.py
Mar 2, 2026
771a1ed
Merge foundation_model into dev-peter
Mar 2, 2026
060a149
Many bugfixees in the dataset class and for computing preprocessing s…
Mar 4, 2026
08a2c7f
Speed-ups in data_loader.py.
Mar 5, 2026
6a55406
Speed-ups in the dataloader.
Mar 9, 2026
3580f39
Merge branch 'foundation_model' into dev-peter
renierts Mar 9, 2026
a4e6e9d
drawing.py:
Mar 10, 2026
0aa3bcd
Added functionalities for preprocessing.
Mar 10, 2026
450330f
Merge branch 'foundation_model' into dev-peter
renierts Mar 10, 2026
d9c8c15
Shell script for batch dtype conversion.
Mar 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions scripts/data_fetching_omega/add_to_h5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""
Add/overwrite data from source H5 files to target H5 file.
Existing signals are overwritten, new signals are added.

Usage:
python add_to_h5.py target.h5 source1.h5 source2.h5 ...
python add_to_h5.py 200000.h5 200000_chiron.h5 200000_extra.h5
"""

import h5py
import sys
import argparse
from pathlib import Path


def add_to_h5(target_file, source_files, verbose=True):
"""
Add/overwrite trees/signals from source files to target file.

Args:
target_file: Path to target HDF5 file (modified in place)
source_files: List of source HDF5 files to add from
verbose: Print progress messages
"""
if not Path(target_file).exists():
print(f"Error: Target file does not exist: {target_file}")
print("Create it first or use one of the source files as target")
sys.exit(1)

if verbose:
print(f"Target file: {target_file}")
print(f"Mode: Overwrite existing signals, add new ones\n")

with h5py.File(target_file, 'a') as f_target:
stats = {
'files_processed': 0,
'trees_added': 0,
'signals_added': 0,
'signals_overwritten': 0
}

for source_file in source_files:
if not Path(source_file).exists():
print(f"Warning: {source_file} does not exist, skipping")
continue

if Path(source_file).resolve() == Path(target_file).resolve():
if verbose:
print(f"Skipping {source_file} (same as target)")
continue

if verbose:
print(f"Adding from: {source_file}")

try:
with h5py.File(source_file, 'r') as f_source:
# Iterate over shots
for shot_name in f_source.keys():
if verbose:
print(f" Shot {shot_name}:")

# Ensure shot exists in target
if shot_name not in f_target:
f_target.create_group(shot_name)
if verbose:
print(f" Created shot group")

# Iterate over trees
for tree_name in f_source[shot_name].keys():
tree_path = f"{shot_name}/{tree_name}"

if tree_path not in f_target:
f_target.create_group(tree_path)
stats['trees_added'] += 1
if verbose:
print(f" Tree {tree_name} (new)")
else:
if verbose:
print(f" Tree {tree_name} (existing)")

# Iterate over signals
for signal_name in f_source[shot_name][tree_name].keys():
signal_path = f"{shot_name}/{tree_name}/{signal_name}"

# Check if signal exists
if signal_path in f_target:
# Overwrite
del f_target[signal_path]
f_source.copy(f_source[signal_path], f_target,
signal_path)
stats['signals_overwritten'] += 1
if verbose:
print(f" {signal_name} (overwritten)")
else:
# Add new
f_source.copy(f_source[signal_path], f_target,
signal_path)
stats['signals_added'] += 1
if verbose:
print(f" {signal_name} (added)")

stats['files_processed'] += 1

except Exception as e:
print(f"Error processing {source_file}: {e}")
import traceback
traceback.print_exc()
continue

# Print summary
if verbose:
print("\n" + "=" * 60)
print("Summary:")
print("=" * 60)
print(f"Files processed: {stats['files_processed']}")
print(f"Trees added: {stats['trees_added']}")
print(f"Signals added: {stats['signals_added']}")
print(f"Signals overwritten: {stats['signals_overwritten']}")
print(f"\nTarget file updated: {target_file}")


def main():
parser = argparse.ArgumentParser(
description='Add/overwrite data from source H5 files to target H5 file',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Add/update chiron data to existing file
python add_to_h5.py 200000.h5 200000_chiron.h5

# Add multiple sources (later sources overwrite earlier ones)
python add_to_h5.py 200000.h5 source1.h5 source2.h5 source3.h5

# Update all files in directory with new data
for file in *.h5; do
python add_to_h5.py "$file" updated_data.h5
done

Behavior:
- If signal exists in target: OVERWRITE with source data
- If signal is new: ADD to target
- Trees are merged (not replaced entirely)
"""
)

parser.add_argument('target', help='Target HDF5 file (will be modified)')
parser.add_argument('sources', nargs='+',
help='Source HDF5 files to add/overwrite from')
parser.add_argument('-q', '--quiet', action='store_true',
help='Suppress progress messages')

args = parser.parse_args()

# Add/overwrite data
add_to_h5(
args.target,
args.sources,
verbose=not args.quiet
)


if __name__ == '__main__':
main()
Loading