In [1]:
import os
import numpy as np
import pandas as pd
from scipy.io import wavfile as wv
from sklearn.preprocessing import normalize

In [8]:
class WaveDataset():
    def __init__(self, data_folder, annotation_file):
        self.data_folder = data_folder
        ds = pd.read_csv(annotation_file)
        self.filenames = list(ds['filename'])
        if 'label' in ds.columns:
            self.labels = list(ds['label'])
        else:
            self.labels = [-1 for i in range(len(self.filenames))]
        self.cache = {}
        
        
    def __len__(self):
        return(len(self.labels))

    def __getitem__(self, index):
        if index in self.cache:
            data, label = self.cache[index]
        else:
            fname = os.path.join(self.data_folder, "%04d.wav" % self.filenames[index])
            _, data = wv.read(fname)
            label = self.labels[index]
            self.cache[index] = (data, label)
        return data, label

In [3]:
train = WaveDataset("../../input/raw/", "../../input/train_labels.csv")


In [4]:
# get max len of the signals
signal = []
for i, (sig, label) in enumerate(train):
    signal.append(len(sig))

In [5]:
# Inicialize X zeros array 
X = np.zeros([len(train), np.max(signal)])
y = []
for i, (sig, label) in enumerate(train):
    X[i]= sig
    y.append(label)

y = np.asarray(y)

In [6]:
# Inicialize X zeros array 
X_norm = np.zeros([len(train), np.max(signal)])
for i, (sig, label) in enumerate(train):
    X_norm[i]= normalize(sig.reshape(1,-1))


In [7]:
np.save('../../input/processed/X.npy',X)

In [8]:
np.save('../../input/processed/X_norm.npy',X_norm)

In [9]:
np.save('../../input/processed/y.npy',y)

In [11]:
submission = WaveDataset("data/raw/", "data/raw/test_files.csv")

In [16]:
# get max len of the signals
signal = []
for i, (sig, label) in enumerate(submission):
    signal.append(len(sig))

In [17]:
# Inicialize S zeros array 
S = np.zeros([len(submission), np.max(signal)])
y = []
for i, (sig, label) in enumerate(submission):
    S[i]= sig

In [18]:
# Inicialize X zeros array 
S_norm = np.zeros([len(submission), np.max(signal)])
for i, (sig, label) in enumerate(submission):
    S_norm[i]= normalize(sig.reshape(1,-1))


In [19]:
np.save('data/processed/submission.npy',S)

In [20]:
np.save('data/processed/submission_norm.npy',S_norm)