In [1]:
import numpy as np
import pickle
import librosa
from scipy.spatial.distance import cdist
import python_speech_features
from sklearn.metrics.pairwise import euclidean_distances
import numba
from matplotlib import pyplot as plt
from librosa.sequence import dtw
from matplotlib import gridspec
import time
import os
from tqdm import tqdm
import glob
import shutil

In [5]:
def getMFCC(file_dir):
    y, sr = librosa.load(file_dir, sr=44100)
    mfcc = python_speech_features.mfcc(y, sr, winstep=0.01, nfft=2048)
    delta_mfcc = python_speech_features.delta(mfcc, 2)
    delta_delta_mfcc = python_speech_features.delta(mfcc, 3)
    mfcc = np.hstack((np.hstack((mfcc, delta_mfcc)),delta_delta_mfcc))
    return mfcc

In [6]:
references = glob.glob('daps-mp3/clean/*.wav')
queries = glob.glob('daps-mp3/queries/*.wav')
tampered025 = glob.glob('daps-mp3/tampered0.25/*.wav')
tampered05 = glob.glob('daps-mp3/tampered0.5/*.wav')
tampered1 = glob.glob('daps-mp3/tampered1/*.wav')
tampered2 = glob.glob('daps-mp3/tampered2/*.wav')
tampered4 = glob.glob('daps-mp3/tampered4/*.wav')

In [14]:
os.makedirs('daps-mp3/mfccs-44k/refs/', exist_ok=True)
for ref in tqdm(references):
    refname = ref.split('/')[-1][:-4]
    mfcc = getMFCC(ref)
    np.save(f'daps-mp3/mfccs-44k/refs/{refname}', mfcc)

100%|█████████████████████████████████████████| 100/100 [01:25<00:00,  1.17it/s]


In [17]:
os.makedirs('daps-mp3/mfccs-44k/refs/', exist_ok=True)
folders = [queries, tampered025, tampered05, tampered1, tampered2, tampered4]
for folder in folders:
    foldername = folder[0].split('/')[1]
    os.makedirs(f'daps-mp3/mfccs-44k/{foldername}/', exist_ok=True)
    for file in tqdm(folder):
        refname = '_'.join(file.split('/')[-1].split('-')[-2].split('_')[2:5])
        filename = file.split('/')[-1][:-4]
        mfcc = getMFCC(file)
        np.save(f'daps-mp3/mfccs-44k/{foldername}/{filename}', mfcc)

100%|███████████████████████████████████████| 3000/3000 [07:38<00:00,  6.54it/s]
100%|███████████████████████████████████████| 9000/9000 [26:03<00:00,  5.76it/s]
100%|███████████████████████████████████████| 9000/9000 [23:36<00:00,  6.36it/s]
100%|███████████████████████████████████████| 9000/9000 [23:16<00:00,  6.44it/s]
100%|███████████████████████████████████████| 9000/9000 [23:20<00:00,  6.42it/s]
100%|███████████████████████████████████████| 9000/9000 [23:51<00:00,  6.29it/s]


In [18]:
train_refs, test_refs = [], []
for i in range(1,6):
    train_refs.extend(glob.glob(f'daps-mp3/mfccs-44k/refs/f{i}_*'))
    train_refs.extend(glob.glob(f'daps-mp3/mfccs-44k/refs/m{i}_*'))
    test_refs.extend(glob.glob(f'daps-mp3/mfccs-44k/refs/f{i+5}_*'))
    test_refs.extend(glob.glob(f'daps-mp3/mfccs-44k/refs/m{i+5}_*'))

os.makedirs('daps-mp3/train/mfccs-44k/refs/', exist_ok=True)
os.makedirs('daps-mp3/test/mfccs-44k/refs/', exist_ok=True)
for path in train_refs:
    shutil.copy(path, 'daps-mp3/train/mfccs-44k/refs/')
for path in test_refs:
    shutil.copy(path, 'daps-mp3/test/mfccs-44k/refs/')

NameError: name 'shutil' is not defined

In [None]:
query_folders = ['queries', 'tampered0.25', 'tampered0.5', 'tampered1', 'tampered2', 'tampered4']

train_lst, test_lst = [], []
for i in range(1,6):
    for folder in query_folders:
        train_lst.extend(glob.glob(f'daps-mp3/mfccs-44k/{folder}/*f{i}_*'))
        train_lst.extend(glob.glob(f'daps-mp3/mfccs-44k/{folder}/*m{i}_*'))
        test_lst.extend(glob.glob(f'daps-mp3/mfccs-44k/{folder}/*f{i+5}_*'))
        test_lst.extend(glob.glob(f'daps-mp3/mfccs-44k/{folder}/*m{i+5}_*'))

for path in train_lst:
    folder = path.split('/')[2]
    os.makedirs(f'daps-mp3/train/mfccs-44k/{folder}/', exist_ok=True)
    shutil.copy(path, f'daps-mp3/train/mfccs-44k/{folder}/')
    
for path in test_lst:
    folder = path.split('/')[2]
    os.makedirs(f'daps-mp3/test/mfccs-44k/{folder}/', exist_ok=True)
    shutil.copy(path, f'daps-mp3/test/mfccs-44k/{folder}/')