# Imports

To refresh imports

In [1]:
%reload_ext autoreload
%autoreload 2

Imports

In [2]:
# -------------------------------- torch stuff ------------------------------- #
import torch
from torch.utils.data import Dataset, DataLoader


# ----------------------------------- other ---------------------------------- #
from glob import glob
import soundfile as sf
from scipy.io import wavfile
import json
import os

# ---------------------------------- Custom ---------------------------------- #
from utils.precision_loss import show_sum_error

### Why using scipy.io instead soundfile

In [3]:
print("soundfile converts directly to float 64")
sf.read("../data/LibriCount/10_85b5ac.wav")[0]

soundfile converts directly to float 64


array([ 0.0944519 ,  0.09963989,  0.13208008, ..., -0.05599976,
       -0.04922485, -0.03912354])

In [4]:
print("wavfile concervs the original format : int16")
wavfile.read("../data/LibriCount/10_85b5ac.wav")[1]

wavfile concervs the original format : int16


array([ 3095,  3265,  4328, ..., -1835, -1613, -1282], dtype=int16)

### Format

In [7]:
F16 = torch.float16
F32 = torch.float32
F64 = torch.float64

In [11]:
show_sum_error()

Conversion Errors from int16 in range(-32768, 32768) maxed by (max is=) :
	Float 16  8.0
	Float 32  0.0
	Float 64  0.0


# Consts

In [12]:
data_dir = "../data/LibriCount"

# Dataset

In [17]:
class AudioCountGender(Dataset):
    def __init__(self, data_dir=data_dir, dtype = F16):
        self.sounds = glob(os.path.join(data_dir,"*.wav"))
        self.labels = glob(os.path.join(data_dir,"*.json"))
        self.dtype = dtype
    def __getitem__(self, index):
        # clip, sample_rate = sf.read(self.sounds[index])
        sample_rate, clip = wavfile.read(self.sounds[index])
        with open(self.labels[index]) as f:
            label = json.load(f)
        genders = [0, 0] #[Male, Female]
        for person in label:
            gender = person["sex"]
            if gender == "F":
                genders[1] += 1
            else :
                genders[0] += 1
        return torch.tensor(clip, dtype=self.dtype), torch.tensor(genders)
    def __len__(self):
        return len(self.sounds)

In [18]:
data = AudioCountGender()
data[800]

(tensor([ 3096.,  3264.,  4328.,  ..., -1835., -1613., -1282.],
        dtype=torch.float16),
 tensor([3, 7]))

# Dataloader

In [29]:
dataloader = DataLoader(dataset=data, batch_size=64, shuffle=True, num_workers=6)

In [20]:
from time import time
L = []
start = time()
for d in dataloader:
    L.append(d)
    print(len(d[0]), end="_")
end = time()
print()
print("Duration: {:.2f}s".format(end-start))

64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_64_24_
Duration: 50.47s
