In [74]:
import os, random
from scipy.io import wavfile
from shutil import copyfile
from sphfile import SPHFile
from WaveNetClassifier import WaveNetClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [2]:
fs, data = wavfile.read('TRAIN_WAV/DR3/FGRW0/SA1.WAV')
display(len(data))

68199

In [6]:
def copy_files(path, filename, base_dir):
    file_path = os.path.join(path, filename)
    path_array = file_path.split(os.sep)
    path_array[0] = base_dir
    try:
        os.makedirs(path_array[0] + "/" + path_array[-2])
    except FileExistsError:
        # directory already exists
        pass
    new_path = path_array[0] + "/" + path_array[-2] + "/" + filename
    copyfile(file_path, new_path)

In [7]:
# Copy all the recordings but the first one to the TRAIN_WAV_UNIFORM directory.

train_base_dir = 'TRAIN_WAV_UNIFORM'
test_base_dir = 'TEST_WAV_UNIFORM'

for path, dirname, filenames in os.walk('TRAIN_WAV'):
    first = True
    for filename in filenames:
        if first:
            first = False
            copy_files(path, filename, test_base_dir)
        else:
            copy_files(path, filename, train_base_dir)

In [8]:
# Find the minimum length.

uniform_base_dir = 'UNIFORM'
min_length = 100000

for path, dirnames, filenames in os.walk('TRAIN_WAV'):
    for filename in filenames:
        file_path = os.path.join(path, filename)
        fs, data = wavfile.read(file_path)
        if len(data) < min_length:
            min_length = len(data)
            
display(min_length)

14644

In [65]:
# Cut training files to the same size and store the user ids and audio data in arrays.

user_ids = []
audio_data = []

for path, dirnames, filenames in os.walk('TRAIN_WAV_UNIFORM'):
    for filename in filenames:
        file_path = os.path.join(path, filename)
        fs, data = wavfile.read(file_path)
        user_id = file_path.split(os.sep)[-2]
        user_ids.append(user_id)
        audio_data.append(data[0 : min_length])

[array([ 4,  1, -1, ...,  1,  3,  4], dtype=int16),
 array([   4,    3,    3, ...,   94,  -60, -277], dtype=int16),
 array([18, -6,  4, ...,  1,  1,  6], dtype=int16),
 array([    8,    10,     3, ..., -1475, -1341, -1123], dtype=int16),
 array([   -1,     4,    12, ...,  -797,  -935, -1018], dtype=int16),
 array([-31,   8,  -2, ..., 187, 144,  38], dtype=int16),
 array([   3,    3,   -2, ..., -431, -431, -455], dtype=int16),
 array([  29,   -6,    9, ..., -405, -452, -449], dtype=int16),
 array([    8,    -3,     2, ..., -1550, -1387, -1057], dtype=int16),
 array([ 24,  -6,   5, ..., -33, 136,  14], dtype=int16),
 array([   5,    0,    2, ...,  -64, -109,  -98], dtype=int16),
 array([  22,    8,   11, ..., -146, -148, -146], dtype=int16),
 array([   8,    1,    4, ...,  900, 1187, 1284], dtype=int16),
 array([19, -3,  1, ..., 88, 80, 72], dtype=int16),
 array([  4,  11,  -4, ..., 399, 250, 133], dtype=int16),
 array([-14,  -2,  -7, ...,  76,  16,  66], dtype=int16),
 array([-13,   4, 

In [77]:
# Shuffle the lists together.

combined = list(zip(user_ids, audio_data))
random.shuffle(combined)

user_ids[:], audio_data[:] = zip(*combined)

In [78]:
# Create a vector for each user id. Categorical -> one hot encoding
# The list has to be converted the numpy array and transposed first.

X_train = audio_data

onehot_encoder = OneHotEncoder(sparse = False, categories = 'auto')
y_train = onehot_encoder.fit_transform(np.array(user_ids).reshape(-1, 1))

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])