In [48]:
import scipy.signal as signal
import numpy as np
import scipy.io.wavfile as wav
import sounddevice as sd
import librosa
from pathlib import Path
import pandas
from torch.utils.data import Dataset, DataLoader

#c:/Users/egorv/Desktop/BProj
PROJECT_ROOT = Path.cwd()
DATA_DIR = PROJECT_ROOT / "code" / "experiments" / "data" / "ASVSpoof2019"
TARGET_SEC = 4.0
TARGET_LEN = int(16000 * TARGET_SEC)

In [None]:
def normalize_duration(y):
    cur_len = len(y)

    if cur_len > TARGET_LEN:
        start = np.random.randint(0, cur_len - TARGET_LEN)
        return y[start:start+TARGET_LEN]
    
    if cur_len < TARGET_LEN:
        pad = TARGET_LEN - cur_len
        return np.pad(y, (0, pad), mode='constant')
    
    else:
        return y
    
class ASVSpoofDataset(Dataset):
    
    def __init__(self, flac_dir, labels_path):
        """
        Returns all the directory where the flac_files are located, returns the files itself,
        returns the dataset with filenames, targets, speaker ID, and type attack ID.
        Also returns the list of filenames, and target dictionary
        """
        self.flac_dir = flac_dir
        self.files = sorted(Path(flac_dir).glob("*.flac"))
        self.labels_df = pandas.read_csv(labels_path, sep=r"\s+", header=None)
        self.file_names = self.labels_df[1]
        self.target = dict(zip(self.labels_df[1], self.labels_df[4]))

    def __len__(self):
        return len(self.files)
    
    def __getitem__(self,idx):
        file_path = self.files[idx]

        x, _ = librosa.load(file_path, sr=None)
        x = self.normalize_duration(x)

        file_name = file_path.stem
        target_str = self.target.get(file_name)

        y = 1 if target_str == 'bonafide' else 0
        return x, y

    def normalize_duration(self, x):
        TARGET_SEC = 4.0
        TARGET_LEN = int(16000 * TARGET_SEC)
        
        cur_len = len(x)

        if cur_len > TARGET_LEN:
            start = np.random.randint(0, cur_len - TARGET_LEN)
            return x[start:start+TARGET_LEN]
        
        if cur_len < TARGET_LEN:
            pad = TARGET_LEN - cur_len
            return np.pad(x, (0, pad), mode='constant')
        else:
            return x

In [49]:
train_flac_dir = (
    DATA_DIR
    / "LA"
    / "ASVSpoof2019_LA_train"
    / "flac"
)
labels_file = DATA_DIR / "LA" / "ASVspoof2019_LA_cm_protocols" / "ASVspoof2019.LA.cm.train.trn.txt"


flac_dataset = ASVSpoofDataset(train_flac_dir, labels_file)
# y, sr = flac_files[0]


In [None]:
loader = DataLoader(flac_dataset, batch_size=16, num_workers=0, shuffle=True, pin_memory=True)