In [122]:
import numpy as np
import h5py
from pathlib import Path
import pandas as pd
from tqdm import tqdm
from copy import copy
import sys
sys.path.append("../")
from utils.config import labels as human_labels
from utils.config import id_to_lb
from copy import deepcopy
# print(f"{labels=}")

In [123]:
! head /datasets/AudioSet/pann_repo/unbalanced_train_segments.csv

index,YTID,start_seconds,end_seconds,positive_labels
0|---1_cCGK4M|0.000|10.000|"/m/01g50p,/m/0284vy3,/m/06d_3,/m/07jdr,/m/07rwm0c"
1|---2_BBVHAA|30.000|40.000|"/m/09x0r"
2|---B_v8ZoBY|30.000|40.000|"/m/04rlf"
3|---EDNidJUA|30.000|40.000|"/m/02qldy,/m/02zsn,/m/05zppz,/m/09x0r"
4|---N4cFAE1A|21.000|31.000|"/m/04rlf,/m/09x0r"
5|---fcVQUf3E|30.000|40.000|"/m/019jd,/m/07yv9"
6|---g9OGAhwc|30.000|40.000|"/m/04rlf,/m/0c1dj"
7|---lTs1dxhU|30.000|40.000|"/m/012f08,/m/07yv9,/m/0k4j,/t/dd00134"
8|---mO--kRQk|30.000|40.000|"/m/04rlf"


In [128]:
meta_csv: pd.DataFrame = pd.read_csv("/datasets/AudioSet/pann_repo/unbalanced_train_segments.csv", sep="|")
print(f"{meta_csv.index=}")
meta_csv.set_index("index")
labels_csv: pd.DataFrame = pd.read_csv("/datasets/AudioSet/pann_repo/metadata/class_labels_indices.csv")
num_classes: int = len(labels_csv)

print(f"{num_classes=}")    

meta_csv.index=RangeIndex(start=0, stop=2041789, step=1)
num_classes=527


## Checking that the human labels are sorted properly

In [129]:
labels_before_sort: list[str] = copy(human_labels)
# assert str(human_labels.sort()) == str(labels_before_sort)
print(f"{human_labels=}")
print(f"{labels_before_sort=}")
for i, label in enumerate(labels_before_sort):
    assert label in human_labels
    assert label in id_to_lb.values()
    assert human_labels[i] == label

human_labels=['A capella', 'Accelerating, revving, vroom', 'Accordion', 'Acoustic guitar', 'Afrobeat', 'Air brake', 'Air conditioning', 'Air horn, truck horn', 'Aircraft', 'Aircraft engine', 'Alarm', 'Alarm clock', 'Ambient music', 'Ambulance (siren)', 'Angry music', 'Animal', 'Applause', 'Arrow', 'Artillery fire', 'Babbling', 'Baby cry, infant cry', 'Baby laughter', 'Background music', 'Bagpipes', 'Bang', 'Banjo', 'Bark', 'Basketball bounce', 'Bass drum', 'Bass guitar', 'Bathtub (filling or washing)', 'Battle cry', 'Beatboxing', 'Bee, wasp, etc.', 'Beep, bleep', 'Bell', 'Bellow', 'Belly laugh', 'Bicycle', 'Bicycle bell', 'Bird', 'Bird flight, flapping wings', 'Bird vocalization, bird call, bird song', 'Biting', 'Bleat', 'Blender', 'Bluegrass', 'Blues', 'Boat, Water vehicle', 'Boiling', 'Boing', 'Boom', 'Bouncing', 'Bow-wow', 'Bowed string instrument', 'Brass instrument', 'Breaking', 'Breathing', 'Burping, eructation', 'Burst, pop', 'Bus', 'Busy signal', 'Buzz', 'Buzzer', 'Cacophony', 

## Checking that the pann repo's code maps the machine IDS to the human labels properly

In [None]:
csv_id_to_human_map: dict = {}
for index, meta_csv_row in labels_csv.iterrows():
    csv_id_to_human_map[meta_csv_row["mid"]] = meta_csv_row["display_name"]

assert csv_id_to_human_map == id_to_lb

## Verifying the melspec HDF5s

In [180]:
# hdf5s_path : Path = Path("../hdf5s/melspec_hdf5s_1.6m/train")
hdf5s_path : Path = Path("/datasets/AudioSet/pann_repo/hdf5s/pack_1.6m_hdf5s/train")


split_stats : dict = {}
batch_size: int = 500

for file in hdf5s_path.glob("*.h5"):
    print(f"{file=}")

    with h5py.File(file) as f:
        print(f"{f.keys()=}")
        assert f['meta_csv_idx'].dtype == np.int32
        assert len(f['meta_csv_idx'].shape) == 1

        if f['target'].dtype == bool:
            if f['target'].shape[1] != num_classes:
                raise ValueError("Target shape does not match number of classes")
        assert len(f['meta_csv_idx'].shape) == 1, "Meta csv idx has more than 1 dimension"
        
        assert f['valid'].dtype == bool 
        assert len(f['valid'].shape) == 1

        if 'waveform' in f.keys():
            print(f"{f['waveform'].dtype=}")
            assert f['waveform'].dtype == np.int16
            assert len(f['waveform'].shape) == 2
        elif 'mel_specs' in f.keys():
            assert f['mel_specs'].dtype == np.float32
            assert len(f['mel_specs'].shape) == 3
        
        # print(f['audio_name'].dtype)
        assert f['audio_name'].dtype == 'S20'

        for i in tqdm(range(0, f['target'].shape[0]//batch_size)):
            # print(f"{i=}")
            # mel_specs: np.array = np.array(f['mel_specs'][i*batch_size:(i+1)*batch_size])
            hdf5_targets_idxs: np.array = np.array(f['target'][i*batch_size:(i+1)*batch_size])
            hdf5_valid: np.array = np.array(f['valid'][i*batch_size:(i+1)*batch_size])

            # check the type of meta_csv_idx

#             print(f"{f['meta_csv_idx'].dtype=}")
            meta_csv_idxs: np.array = np.array(f['meta_csv_idx'][i*batch_size:(i+1)*batch_size])

            # get rows from meta_csv
            meta_csv_rows: pd.DataFrame = meta_csv.loc[meta_csv_idxs]
            # print(f"{meta_csv_rows=}")

            csv_labels: np.array = np.zeros((meta_csv_rows.shape[0], num_classes), dtype=bool)
            # print(f"{csv_labels=}")
            
            csv_targets: np.array = np.zeros((meta_csv_rows.shape[0], num_classes), dtype=bool)
            # verifying that the labels in the CSV are the same as the labels in the HDF5
            for i, (idx, meta_csv_row) in enumerate(meta_csv_rows.iterrows()): 

                if hdf5_valid[i] == False:
                    continue

                human_label_idxs: list[str] = [ human_labels.index(id_to_lb[lb].replace("\"", "")) for lb in meta_csv_row['positive_labels'].replace("\"", "").split(',')]
                print(f"{human_label_idxs=}")   
                print(f"{idx=}")
                csv_targets[i, human_label_idxs] = True

                tmp_where = np.where(hdf5_targets_idxs[i] == True)
                if isinstance(tmp_where, tuple):
                    tmp_where = tmp_where[0]
                    assert len(tmp_where) > 0
                hdf5_targets_idxs: np.array = tmp_where
                # print(f"{}")
                # print(f"{}")
                print(f"{hdf5_targets_idxs=}")
                print(f"{human_label_idxs=}")
                assert np.array_equal(np.array(human_label_idxs), hdf5_targets_idxs)


            
            # if not np.array_equal(hdf5_targets, csv_targets):
            #     print("HDF5 and CSV targets are not equal")
            #     print(f"{np.argmax(hdf5_targets, axis=1)=}")
            #     print(f"{np.argmax(csv_targets, axis=1)=}")
            # assert np.array_equal(csv_targets, hdf5_targets)

            # min_vals: np.array = np.min(mel_specs, axis=(1, 2))
            # max_vals: np.array = np.max(mel_specs, axis=(1, 2))
#             # print(f"{min_vals.shape=}")
#             # print(f"{max_vals.shape=}")
#             # print(f"{min_vals[:10]=}")
#             # print(f"{max_vals[:10]=}")
#             # print(f"{meta_csv_idxs[:10]=}")

#             # print(f"{meta_csv.loc[list(meta_csv_idxs)]=}")
            # break

            

#         # # print(f['audio_name'])
#         # waveform: np.array = np.array(f['mel_specs'])
#         # # print out norms of waveforms
#         # print(f"{np.linalg.norm(waveform, axis=0)}")
#         # load_arr : np.array = np.array(f['valid'])
#         # print(f"{load_arr.any()}")
#         # print(f"{load_arr.shape=}")
#         # print("Number of valid files is ", np.sum(load_arr.astype(int)))
#         # print("Number of total entries is ", f"{load_arr.shape[0]}")
#         # # print(f"{}")
#         # split_stats[file] = {
#         #     "num_audios" : np.sum(load_arr),
#         #     "total_rows" : load_arr.shape[0]
#         # }
    break

# total_num_audios : int = 0
# total_rows : int = 0

# for key in split_stats:
#     total_num_audios += split_stats[key]['num_audios']
#     total_rows += split_stats[key]['total_rows']

# print(f"{total_num_audios=}")
# print(f"{total_rows=}")




file=PosixPath('/datasets/AudioSet/pann_repo/hdf5s/pack_1.6m_hdf5s/train/audioset_0.h5')
f.keys()=<KeysViewHDF5 ['audio_name', 'meta_csv_idx', 'target', 'valid', 'waveform']>
f['waveform'].dtype=dtype('int16')


  tmp_where = np.where(hdf5_targets_idxs[i] == True)
  0%|          | 0/83 [00:00<?, ?it/s]

human_label_idxs=[291, 418]
idx=106752
hdf5_targets_idxs=array([291, 418])
human_label_idxs=[291, 418]
human_label_idxs=[291, 297, 491]
idx=1753785





AssertionError: 

In [11]:
f.close()