In [1]:
from data.dataloader_v1 import AudioDataset

In [2]:
! pwd

/home/prs392/codes/incubator/non-invertible-audio-feature-generation/development/param/openl3_librispeech/data_loader


In [4]:
import os
import random

import torch
from torch.utils.data import Dataset
import torchaudio.functional as F

import numpy as np
import pandas as pd

import tqdm


def binarySearch(data, val):
    highIndex = len(data)-1
    lowIndex = 0
    while highIndex > lowIndex:
            index = (highIndex + lowIndex) // 2
            sub = data[index]
            if data[lowIndex] == val:
                    return [lowIndex, lowIndex]
            elif sub == val:
                    return [index, index]
            elif data[highIndex] == val:
                    return [highIndex, highIndex]
            elif sub > val:
                    if highIndex == index:
                            return sorted([highIndex, lowIndex])
                    highIndex = index
            else:
                    if lowIndex == index:
                            return sorted([highIndex, lowIndex])
                    lowIndex = index
    return sorted([highIndex, lowIndex])


class AudioDataset(Dataset):

    def __init__(self, root_dir, transform=None, num_audios = -1, return_amp = True):
        
        self.root_dir = root_dir
        self.transform = transform
        self.num_audios = num_audios
        self.return_amp = return_amp
        
        self.df = pd.read_csv(root_dir, 'number_of_frames_per_audio.csv')
        self.cumulative_sum = self.df['number_of_frames'].cumsum()
                
    def __len__(self):
        return self.df['number_of_frames'].sum()

    def __getitem__(self, idx):
        
        low_index, high_index = binarySearch(self.cumulative_sum, idx+1)
        
        file_name = self.df.iloc[high_index]['file_name']
        
        emb_path = os.path.join(self.embeddings_dir, file_name)
        
        spec_path = os.path.join(self.spectrograms_dir, file_name)
        
        frame_idx = self.list_of_embedding_files_frames[idx][1]
        
        
        with open(emb_path, 'rb') as f:
            emb = np.load(f)
            
        with open(spec_path, 'rb') as f:
            spec = np.load(f)
            
#         print(self.list_of_embedding_files_frames[idx],frame_idx,len(emb),len(spec))
        
        emb_tensor = torch.from_numpy(emb[frame_idx])
        spec_tensor = torch.from_numpy(spec[frame_idx]).permute(2, 0, 1)
        
        if self.return_amp is True:
            spec_tensor_amp = F.DB_to_amplitude(x = spec_tensor, ref = 1, power = 0.5)
            return emb_tensor, spec_tensor_amp, torch.tensor(frame_idx)
        
        else:
            return emb_tensor, spec_tensor, torch.tensor(frame_idx)



In [7]:
# audio_dataset = AudioDataset(root_dir='/scratch/prs392/incubator/data/LibriSpeech/train-clean-360')

# for i in range(len(audio_dataset)):
#     sample, spec, j = audio_dataset[i]
#     print(sample.shape, spec.shape, j)
    
#     if i == 100:
#         break

In [9]:
root_dir = '/scratch/prs392/incubator/data/LibriSpeech/train-clean-360'
df = pd.read_csv(os.path.join(root_dir, 'number_of_frames_per_audio.csv'))

In [11]:
cumulative_sum = df['number_of_frames'].cumsum()

In [32]:
df['number_of_frames']

0         150
1         130
2         143
3         130
4         143
         ... 
104009    152
104010    110
104011    149
104012    130
104013    124
Name: number_of_frames, Length: 104014, dtype: int64

In [30]:
cumulative_sum

0              150
1              280
2              423
3              553
4              696
            ...   
104009    12202480
104010    12202590
104011    12202739
104012    12202869
104013    12202993
Name: number_of_frames, Length: 104014, dtype: int64

In [15]:
def binarySearch(data, val):
    highIndex = len(data)-1
    lowIndex = 0
    while highIndex > lowIndex:
            index = (highIndex + lowIndex) // 2
            sub = data[index]
            if data[lowIndex] == val:
                    return [lowIndex, lowIndex]
            elif sub == val:
                    return [index, index]
            elif data[highIndex] == val:
                    return [highIndex, highIndex]
            elif sub > val:
                    if highIndex == index:
                            return sorted([highIndex, lowIndex])
                    highIndex = index
            else:
                    if lowIndex == index:
                            return sorted([highIndex, lowIndex])
                    lowIndex = index
    return sorted([highIndex, lowIndex])

In [33]:
l, h = binarySearch(cumulative_sum, 281)

In [41]:
df.iloc[l]['file_name']

'439-123866-0023.npy'