In [1]:
from pathlib import Path
import h5py

In [2]:
old_labels_path = Path('/home/smithdr/ml/elm_data/_archive/step_6_labeling_tool_v2/data_archive/labeled_elm_events_long_windows_20230124.hdf5')
assert old_labels_path.exists()

new_metadata_path = Path('/home/smithdr/ml/elm_data/step_4_shot_partial_data/data_v5.hdf5')
assert new_metadata_path.exists()

new_labels_path = Path('/home/smithdr/ml/elm_data/step_5_label_elms/labeled_elms_v3.hdf5')
assert new_labels_path.exists()

with (
    h5py.File(new_metadata_path) as new_matadata_file, 
    h5py.File(new_labels_path) as new_labels_file,
    h5py.File(old_labels_path) as old_labels_file,
):
    shots_in_new_metadata_file = {int(shot) for shot in new_matadata_file['shots']}
    print(f'Shots in new metadata file: {len(shots_in_new_metadata_file)}')
    shots_in_new_labels_file = {int(shot) for shot in new_labels_file['shots']}
    print(f'Shots in new labels file: {len(shots_in_new_labels_file)}')
    excluded_shots_in_new_labels_file = new_labels_file.attrs['excluded_shots']
    print(f'Excluded shots in new labels file: {len(excluded_shots_in_new_labels_file)}')
    shots_in_old_labels_file = {old_labels_file[key].attrs['shot'] for key in old_labels_file}
    print(f'Shots in old labels file: {len(shots_in_old_labels_file)}')
    elms_in_old_labels_file = {int(key) for key in old_labels_file}
    print(f'ELMs in old labels file: {len(elms_in_old_labels_file)}')
    old_shots_in_new_shots = set()
    old_elms_in_new_shots = 0
    old_elms_in_new_shots_with_good_length = 0
    long_elms_per_old_shot = {}
    for elm_key in old_labels_file:
        shot = old_labels_file[elm_key].attrs['shot']
        if shot not in shots_in_new_metadata_file:
            continue
        old_shots_in_new_shots.add(shot)
        old_elms_in_new_shots += 1
        time_data = old_labels_file[elm_key]['time']
        t_mid = (time_data[0] + time_data[-1]) / 2
        t_length = time_data[-1] - time_data[0]
        if t_length >= 20:
            old_elms_in_new_shots_with_good_length += 1
            if shot in long_elms_per_old_shot:
                long_elms_per_old_shot[shot] += 1
            else:
                long_elms_per_old_shot[shot] = 1
                # elm_group = old_labels_file[elm_key]
                # print(f"shot {shot} i_elm {elm_key}")
                # for attr_key, attr_value in elm_group.attrs.items():
                #     print(f"  {attr_key}  {attr_value}")
                # for key in elm_group:
                #     print(f"  {key} {elm_group[key].shape}")
print(f'Old shots in new shots: {len(old_shots_in_new_shots)}')
print(f'Old ELMs in new shots: {old_elms_in_new_shots}')
print(f'Old ELMs in new shots with good length: {old_elms_in_new_shots_with_good_length}')
old_shots_to_copy = []
old_elms_to_copy = 0
for shot, n_elms in long_elms_per_old_shot.items():
    if n_elms < 8: continue
    # print(f"  shot {shot} with {n_elms} ELMs")
    old_shots_to_copy.append(shot)
    old_elms_to_copy += n_elms
print(f'Old shots to copy: {len(old_shots_to_copy)}')
print(f'Old ELMs to copy: {old_elms_to_copy}')
print(f'Avg ELMs per shot: {old_elms_to_copy/len(old_shots_to_copy):.1f}')

all_new_shots = set(shots_in_new_labels_file) | set(excluded_shots_in_new_labels_file)
with h5py.File(new_labels_path) as new_labels_file:
    for shot in old_shots_to_copy:
        if shot not in all_new_shots:
            continue
        print(f'Old shot {shot} with {long_elms_per_old_shot[shot]} ELMs')
        if shot in shots_in_new_labels_file:
            n_elms = new_labels_file['shots'][str(shot)].attrs['shot_intervals'].shape[0]
            print(f"  In new labeled shots with {n_elms} ELMs")
        elif shot in excluded_shots_in_new_labels_file:
            print(f'  In new EXCLUDED shots')


Shots in new metadata file: 887
Shots in new labels file: 89
Excluded shots in new labels file: 150
Shots in old labels file: 220
ELMs in old labels file: 3651
Old shots in new shots: 127
Old ELMs in new shots: 2468
Old ELMs in new shots with good length: 517
Old shots to copy: 24
Old ELMs to copy: 325
Avg ELMs per shot: 13.5
Old shot 175687 with 11 ELMs
  In new labeled shots with 35 ELMs
Old shot 175690 with 9 ELMs
  In new labeled shots with 79 ELMs
Old shot 179456 with 13 ELMs
  In new labeled shots with 41 ELMs
Old shot 179854 with 11 ELMs
  In new labeled shots with 56 ELMs
Old shot 179873 with 13 ELMs
  In new labeled shots with 35 ELMs
Old shot 184449 with 12 ELMs
  In new labeled shots with 27 ELMs
