In [1]:
import numpy as np 
import pandas as pd 
import re
import os
import glob
import pickle
from sklearn.model_selection import train_test_split 
import librosa

In [2]:
INPUT_FOLDER = "input/"
# INPUT_FOLDER = "../input/"
print(os.listdir(INPUT_FOLDER))

['train_curated.csv', 'train_noisy.csv', 'sample_submission.csv', 'train_noisy', 'train_curated', 'test']


In [3]:
TRAIN_CURATED_PATH = INPUT_FOLDER + "train_curated.csv"
TRAIN_NOISY_PATH = INPUT_FOLDER + "train_noisy.csv"
SAMPLE_SUBMISSION_PATH = INPUT_FOLDER + "sample_submission.csv"
TRAIN_CURATED = INPUT_FOLDER + "train_curated/"
TRAIN_NOISY = INPUT_FOLDER + "train_noisy/"
TEST = INPUT_FOLDER + "test/"

train_curated = pd.read_csv(TRAIN_CURATED_PATH)
train_noisy = pd.read_csv(TRAIN_NOISY_PATH)
sample = pd.read_csv(SAMPLE_SUBMISSION_PATH)

In [4]:
def one_hot(labels, src_dict):
    ar = np.zeros([len(labels), len(src_dict)])
    for i, label in enumerate(labels):
        label_list = label.split(',')
        for la in label_list:
            ar[i, src_dict[la]] = 1
    return ar

In [5]:
target_names = sample.columns[1:]
num_targets = len(target_names)

src_dict = {target_names[i]:i for i in range(num_targets)}
src_dict_inv = {i:target_names[i] for i in range(num_targets)}

In [6]:
num_freq = 128
len_div = 256

In [7]:
fname_curated = train_curated['fname'].values
labels = one_hot(train_curated['labels'], src_dict)

fname_train,fname_val, labels_train, labels_val = train_test_split(fname_curated, labels, test_size=0.2, random_state=0)

In [8]:
def process(fnames, labels, pickle_name, folder, num_freq=num_freq, len_div=len_div, div=2000):
    num_batch = len(fnames) // div
    rest = len(fnames) % div

    pos = [range(div*k, div*(k+1)) for k in range(num_batch)]
    pos.append(range(div*num_batch, div*num_batch+rest))

    for k in range(num_batch+1):    
        X_proc_ = np.zeros([1, num_freq, len_div])
        y_proc_ = np.zeros([1,80])
        for i, file in enumerate(fnames[pos[k]]):
            wavfile = folder + file
            y_proc, sr = librosa.load(wavfile)
            S = librosa.feature.melspectrogram(y_proc, sr=sr, n_mels=num_freq)
            log_S = librosa.power_to_db(S, ref=np.max)
            X_proc = (log_S + 80) / 40 - 1

            num_div = X_proc.shape[1] // len_div
            num_pad = len_div - X_proc.shape[1] % len_div
            redidual_amp = np.zeros([num_freq, num_pad])
            dum = np.hstack([X_proc, redidual_amp])
            X_proc_ = np.vstack([X_proc_, np.array(np.split(dum, num_div+1,1))])
            for _ in range(num_div+1):
                y_proc_ = np.vstack([y_proc_, labels[i+div*k]])

        X = X_proc_[1:]
        y = y_proc_[1:]
        X = X.reshape([-1, num_freq, len_div, 1])
        print('iter No.{} is done.'.format(k))
        with open('preprocessed_dataset/{}_{}.pickle'.format(pickle_name, k), 'wb') as f:
            pickle.dump(X, f)
            pickle.dump(y, f)

In [19]:
process(fname_train, labels_train, pickle_name='train_arr')

iter No.0 is done.
iter No.1 is done.


In [None]:
process(fname_val, labels_val, pickle_name='val_arr', folder=TRAIN_CURATED)

In [9]:
fname_noisy = train_noisy['fname'].values
label_noisy = one_hot(train_noisy['labels'], src_dict)

fname_noisy_train,fname_noisy_val, labels_noisy_train, labels_noisy_val = train_test_split(fname_noisy, label_noisy, test_size=0.2, random_state=0)

In [10]:
process(fname_noisy_train, labels_noisy_train, pickle_name='noisy_train_arr', folder=TRAIN_NOISY)

iter No.0 is done.
iter No.1 is done.
iter No.2 is done.
iter No.3 is done.
iter No.4 is done.
iter No.5 is done.
iter No.6 is done.
iter No.7 is done.


In [None]:
process(fname_noisy_val, labels_noisy_val, pickle_name='noisy_val_arr', folder=TRAIN_NOISY)

In [26]:
div = 300
num_batch = len(file_name) // div
rest = len(file_name) % div

pos = [range(div*k, div*(k+1)) for k in range(num_batch)]
pos.append(range(div*num_batch, div*num_batch+rest))

for k in range(num_batch+1):
    X_proc_ = []
    for file in file_name[pos[k]]:
        wavfile = file
        y_proc, sr = librosa.load(wavfile)
        S = librosa.feature.melspectrogram(y_proc, sr=sr, n_mels=num_freq)
        log_S = librosa.power_to_db(S, ref=np.max)
        X_proc = (log_S + 80) / 40 - 1

        num_div = X_proc.shape[1] // len_div
        num_pad = len_div - X_proc.shape[1] % len_div
        redidual_amp = np.zeros([num_freq, num_pad])
        dum = np.hstack([X_proc, redidual_amp])
        X_proc_.append(np.array(np.split(dum, num_div+1,1)))
    
    print('iter No.{} is done.'.format(k))

    with open('out/test_arr_{}.pickle'.format(k), 'wb') as f:
        pickle.dump(X_proc_, f)

iter No.0 is done.
iter No.1 is done.
iter No.2 is done.
iter No.3 is done.


In [27]:
X_proc_ = []

for k in range(num_batch+1):
    with open('out/test_arr_{}.pickle'.format(k), 'rb') as f:
        X_part = pickle.load(f)
    X_proc_.extend(X_part)
    
with open('test_arr.pickle', 'wb') as f:
    pickle.dump(X_proc_, f)
    pickle.dump(file_name, f)

In [29]:
len(X_proc_)

1120

In [None]:
X_test_list = []

for file in filename:
    wavfile = file
    y_proc, sr = librosa.load(wavfile)
    S = librosa.feature.melspectrogram(y_proc, sr=sr, n_mels=num_freq)
    log_S = librosa.power_to_db(S, ref=np.max)
    X_proc = (log_S + 80) / 40 - 1
    
    num_div = X_proc.shape[1] // len_div
    num_pad = len_div - X_proc.shape[1] % len_div
    redidual_amp = np.zeros([num_freq, num_pad])
    dum = np.hstack([X_proc, redidual_amp])
    X_test_list.append(np.array(np.split(dum, num_div+1,1)))

In [None]:
pred_list = []
for X_test in X_test_list:
    pred = model.predict(X_test.reshape([-1, num_freq, len_div,1])).sum(axis=0) / len(X_test)
    pred_list.append(pred)
y_pred = np.array(pred_list)

In [None]:
sound_names = sample.columns[1:]

In [None]:
names = []
for f in filename:
    names.append(f.split('/')[-1])

In [None]:
se_file = pd.Series(names, name='fname')
label = pd.DataFrame(y_pred, columns=sound_names)

In [None]:
sub_df = pd.concat([se_file, label], axis=1)

In [None]:
sub_df.to_csv('submission.csv', index=False)