In [1]:
from data.dataloader_v1 import AudioDataset

In [2]:
! pwd

/home/prs392/codes/incubator/non-invertible-audio-feature-generation/development/param/openl3_librispeech/data_loader


In [60]:
import os
import random

import torch
from torch.utils.data import Dataset
import torchaudio.functional as F

import numpy as np
import pandas as pd

import tqdm


def binarySearch(data, val):
    highIndex = len(data)-1
    lowIndex = 0
    while highIndex > lowIndex:
            index = (highIndex + lowIndex) // 2
            sub = data[index]
            if data[lowIndex] == val:
                    return [lowIndex, lowIndex]
            elif sub == val:
                    return [index, index]
            elif data[highIndex] == val:
                    return [highIndex, highIndex]
            elif sub > val:
                    if highIndex == index:
                            return sorted([highIndex, lowIndex])
                    highIndex = index
            else:
                    if lowIndex == index:
                            return sorted([highIndex, lowIndex])
                    lowIndex = index
    return sorted([highIndex, lowIndex])


class AudioDataset(Dataset):

    def __init__(self, root_dir, transform=None, num_audios = -1, return_amp = True):
        
        self.root_dir = root_dir
        self.embeddings_dir = os.path.join(self.root_dir, 'embeddings_6144')
        self.spectrograms_dir = os.path.join(self.root_dir, 'spectrograms')
        self.transform = transform
        self.num_audios = num_audios
        self.return_amp = return_amp
        
        self.df = pd.read_csv(os.path.join(root_dir, 'number_of_frames_per_audio.csv'))
        if num_audios > 0 and isinstance(num_audios, int):
            self.df = self.df.head(num_audios)
        self.cumulative_sum = self.df['number_of_frames'].cumsum()
        
#         for i in range(self.df['number_of_frames'].sum()):
#             low_index, high_index = binarySearch(self.cumulative_sum, i+1)
#             print([low_index, high_index])
#             if i == 300:
#                 break
                
    def __len__(self):
        return self.df['number_of_frames'].sum()

    def __getitem__(self, idx):
        
        low_index, high_index = binarySearch(self.cumulative_sum, idx+1)
        file_name = self.df.iloc[high_index]['file_name']
        emb_path = os.path.join(self.embeddings_dir, file_name)        
        spec_path = os.path.join(self.spectrograms_dir, file_name)
        
        if low_index == 0 and high_index == 0:
            frame_idx = idx
        else:
            frame_idx = idx - self.cumulative_sum[low_index]
        
        with open(emb_path, 'rb') as f:
            emb = np.load(f)
        with open(spec_path, 'rb') as f:
            spec = np.load(f)
        
        emb_tensor = torch.from_numpy(emb[frame_idx])
        spec_tensor = torch.from_numpy(spec[frame_idx]).permute(2, 0, 1)
        
        print(file_name, frame_idx, emb.shape[0])
        
        if self.return_amp is True:
            spec_tensor_amp = F.DB_to_amplitude(x = spec_tensor, ref = 1, power = 0.5)
            return emb_tensor, spec_tensor_amp, torch.tensor(frame_idx)
        
        else:
            return emb_tensor, spec_tensor, torch.tensor(frame_idx)



In [63]:
audio_dataset = AudioDataset(root_dir='/scratch/prs392/incubator/data/LibriSpeech/train-clean-360', num_audios=2)

for i in range(len(audio_dataset)):
    sample, spec, j = audio_dataset[i]
    
    if i == 1300:
        break

6567-53342-0017.npy 0 150
6567-53342-0017.npy 1 150
6567-53342-0017.npy 2 150
6567-53342-0017.npy 3 150
6567-53342-0017.npy 4 150
6567-53342-0017.npy 5 150
6567-53342-0017.npy 6 150
6567-53342-0017.npy 7 150
6567-53342-0017.npy 8 150
6567-53342-0017.npy 9 150
6567-53342-0017.npy 10 150
6567-53342-0017.npy 11 150
6567-53342-0017.npy 12 150
6567-53342-0017.npy 13 150
6567-53342-0017.npy 14 150
6567-53342-0017.npy 15 150
6567-53342-0017.npy 16 150
6567-53342-0017.npy 17 150
6567-53342-0017.npy 18 150
6567-53342-0017.npy 19 150
6567-53342-0017.npy 20 150
6567-53342-0017.npy 21 150
6567-53342-0017.npy 22 150
6567-53342-0017.npy 23 150
6567-53342-0017.npy 24 150
6567-53342-0017.npy 25 150
6567-53342-0017.npy 26 150
6567-53342-0017.npy 27 150
6567-53342-0017.npy 28 150
6567-53342-0017.npy 29 150
6567-53342-0017.npy 30 150
6567-53342-0017.npy 31 150
6567-53342-0017.npy 32 150
6567-53342-0017.npy 33 150
6567-53342-0017.npy 34 150
6567-53342-0017.npy 35 150
6567-53342-0017.npy 36 150
6567-53342-

In [9]:
root_dir = '/scratch/prs392/incubator/data/LibriSpeech/train-clean-360'
df = pd.read_csv(os.path.join(root_dir, 'number_of_frames_per_audio.csv'))

In [11]:
cumulative_sum = df['number_of_frames'].cumsum()

In [32]:
df['number_of_frames']

0         150
1         130
2         143
3         130
4         143
         ... 
104009    152
104010    110
104011    149
104012    130
104013    124
Name: number_of_frames, Length: 104014, dtype: int64

In [30]:
cumulative_sum

0              150
1              280
2              423
3              553
4              696
            ...   
104009    12202480
104010    12202590
104011    12202739
104012    12202869
104013    12202993
Name: number_of_frames, Length: 104014, dtype: int64

In [15]:
def binarySearch(data, val):
    highIndex = len(data)-1
    lowIndex = 0
    while highIndex > lowIndex:
            index = (highIndex + lowIndex) // 2
            sub = data[index]
            if data[lowIndex] == val:
                    return [lowIndex, lowIndex]
            elif sub == val:
                    return [index, index]
            elif data[highIndex] == val:
                    return [highIndex, highIndex]
            elif sub > val:
                    if highIndex == index:
                            return sorted([highIndex, lowIndex])
                    highIndex = index
            else:
                    if lowIndex == index:
                            return sorted([highIndex, lowIndex])
                    lowIndex = index
    return sorted([highIndex, lowIndex])

In [33]:
l, h = binarySearch(cumulative_sum, 281)

In [41]:
df.iloc[l]['file_name']

'439-123866-0023.npy'