In [1]:
import librosa
import soundfile as sf
import numpy as np
import os
import shutil
import glob
from sklearn.model_selection import StratifiedKFold

In [2]:
# change the sample rate and the number of samples according to your needs
sr = 20000
samples = sr * 5  # 5 seconds audio clips

In [3]:
mainDir = os.path.dirname(os.path.dirname(os.getcwd()))
dst_path = os.path.join(mainDir, 'datasets', 'esc10')

master = os.path.join(dst_path, 'master')

if not os.path.exists(master):
    os.mkdir(master)

In [4]:
classes = ["chainsaw", "clock_tick", "crackling_fire", "crying_baby",
           "dog", "helicopter", "rain", "rooster", "sea_waves", "sneezing"]

In [5]:
for idx, fold in enumerate(classes):
    print('Processing fold: {}'.format(fold))
    
    for src_file in sorted(glob.glob(os.path.join(dst_path, '{}'.format(fold), '*.wav'))):
        dst_file = src_file.replace(os.path.join(dst_path, '{}'.format(fold)), master)
        dst_file = dst_file.replace('.wav', '_{}.wav'.format(idx))
        
        audio_data, s = librosa.load(src_file, sr=sr, mono=True)
        length = len(audio_data)
        n = samples//length + 1
        audio_data = np.tile(audio_data, n)
        audio_data = audio_data[:samples]
        
        sf.write(dst_file, audio_data, sr)
        

Processing label: chainsaw
Processing label: clock_tick
Processing label: crackling_fire
Processing label: crying_baby
Processing label: dog
Processing label: helicopter
Processing label: rain
Processing label: rooster
Processing label: sea_waves
Processing label: sneezing


In [9]:
X = []
y = []

for processed_file in sorted(glob.glob(os.path.join(master, '*.wav'))):
    y.append(int(processed_file.split('_')[1].split('.')[0]))
    X.append(librosa.load(processed_file, sr=sr, mono=True)[0])

In [10]:
X = np.array(X)
y = np.array(y)

print('X shape: {}'.format(np.array(X).shape))
print('y shape: {}'.format(np.array(y).shape))

X shape: (400, 100000)
y shape: (400,)


In [12]:
# Create an instance of StratifiedKFold with 5 folds
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

ESC10 = {}

# Iterate over the folds
for fold, (train_index, test_index) in enumerate(stratified_kfold.split(X, y)):
    ESC10['fold{}'.format(fold + 1)] = {}
    # Split the data into training and testing sets
    X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
    y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]

    # Now, X_train, y_train contain the training data for the current fold
    # and X_test, y_test contain the testing data for the current fold

    print(f"Fold {fold + 1}:")
    print(f"  Training samples: {len(X_train)}")
    print(f"  Testing samples: {len(X_test)}")
    
    print('Testing bin count -  {}'.format(np.bincount(y_test)))
    
    ESC10['fold{}'.format(fold + 1)]['sounds'] = X_test
    ESC10['fold{}'.format(fold + 1)]['labels'] = y_test

save_path = os.path.join(dst_path, 'wav{}.npz'.format(sr//1000))
np.savez(save_path, **ESC10)   
print('Dataset created')

Fold 1:
  Training samples: 320
  Testing samples: 80
Testing bin count -  [8 8 8 8 8 8 8 8 8 8]
Fold 2:
  Training samples: 320
  Testing samples: 80
Testing bin count -  [8 8 8 8 8 8 8 8 8 8]
Fold 3:
  Training samples: 320
  Testing samples: 80
Testing bin count -  [8 8 8 8 8 8 8 8 8 8]
Fold 4:
  Training samples: 320
  Testing samples: 80
Testing bin count -  [8 8 8 8 8 8 8 8 8 8]
Fold 5:
  Training samples: 320
  Testing samples: 80
Testing bin count -  [8 8 8 8 8 8 8 8 8 8]
Dataset created


In [16]:
shutil.rmtree(master)