In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchdata.datapipes.iter import FileLister, FileOpener
from tqdm import tqdm

In [11]:
def tfrecord_to_df(data_name):
    datapipe1 = FileLister(f"./audioset_v1_embeddings/{data_name}", "*.tfrecord")
    datapipe2 = FileOpener(datapipe1,mode="b")
    tfrecord_loader_dp = datapipe2.load_from_tfrecord()

    data_list = []
    for record in tqdm(iter(tfrecord_loader_dp)):
        record_dict = dict()
        for tensor_item in ['start_time_seconds','end_time_seconds','labels']:
            if tensor_item == 'labels':
                record_dict[tensor_item] = record[tensor_item].tolist()
            else:
                record_dict[tensor_item] = record[tensor_item].item()
        record_dict['video_id'] = record['video_id'][0][0]
        record_dict['audio_embedding'] = [[embedding[0][i] for i in range(len(embedding[0]))] for embedding in record['audio_embedding']]
        data_list.append(record_dict)

    df = pd.DataFrame(data_list).set_index(['video_id'])
    df = df[~df.isnull()]
    df = df[df['audio_embedding'].apply(lambda x: len(x)) == 10]

    embeddings = np.vstack(df['audio_embedding'].apply(lambda x: np.vstack(x))).reshape(-1,10,128)
    np.save(f'{data_name}_embedding.npy',embeddings)

    def label_converter(x):
        output = np.zeros(527,dtype=int)
        for label in x:
            output[label] = 1
        return output

    df['label'] = df['labels'].apply(lambda x: label_converter(x))

    labels = np.vstack(df['label']).reshape(-1,527)
    np.save(f'{data_name}_labels.npy',labels)
    
    df.to_parquet(f'{data_name}.parquet')

In [12]:
tfrecord_to_df("bal_train")
tfrecord_to_df("eval")

22160it [00:06, 3263.28it/s]
20371it [00:06, 3357.72it/s]
