In [1]:
import os
import numpy as np
from datasets import Dataset, DatasetDict, load_from_disk
import librosa as lib

  from .autonotebook import tqdm as notebook_tqdm


## Import data from wav file

In [2]:
def load_from_file(file: str):
    sig, sr = lib.load(file)
    t = 5*sr
    return sig, t, sr

### Slices of 5 sec

In [3]:
def slices(sig: list, t: int):
    li = []
    for i in range(0,len(sig)-t,t):
        li.append(sig[i:i+t])
    return li

### Load slices as mel spectrogamms

In [4]:
def gen_spectrogramm(li: list, sr: int):    
    hl = 512 # number of samples per time-step in spectrogram
    hi = 216 # Height of image
    wi = 384 # Width of image

    spec = []
    for el in li:
        S = lib.feature.melspectrogram(y=el, sr=sr, n_mels=hi, fmax=8000,hop_length=hl)
        spec.append(S)

    return spec

In [5]:
def preprocess_chunks(data: str):
    sig, t, sr = load_from_file(data)
    li = slices(sig, t)
    return gen_spectrogramm(li, sr)


In [6]:
spec = preprocess_chunks("Grego_chant.wav")

In [7]:
np.shape(spec)

(720, 216, 216)

In [8]:
output = []
for e in spec:
    output.append(np.reshape(e,(-1,)))

In [9]:
print(output[0].shape)

(46656,)


In [10]:
buf_dict = {"X": output}

In [11]:
train_dataset = Dataset.from_dict(buf_dict)
dataset = {
    'train': train_dataset,
}
dataset = DatasetDict(dataset)
path = 'data/hugging_face_dataset/'
os.makedirs(path, exist_ok=True)
dataset.save_to_disk(path)

Saving the dataset (1/1 shards): 100%|██████████| 720/720 [00:00<00:00, 5320.33 examples/s]


In [12]:
dataset_dict = DatasetDict.load_from_disk(path)
print(dataset_dict)
dataset_dict['train'].features

DatasetDict({
    train: Dataset({
        features: ['X'],
        num_rows: 720
    })
})


{'X': Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)}