In [83]:
import os
import math
import keras
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, Flatten, Lambda, Dropout, Activation, LSTM, GRU, \
        TimeDistributed, Convolution1D, MaxPooling1D, Convolution2D, MaxPooling2D, \
        BatchNormalization, GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, \
        ZeroPadding2D, Reshape, merge, AtrousConvolution1D
from keras.layers.local import LocallyConnected1D
from keras.layers.advanced_activations import ELU
from keras.optimizers import Adam, RMSprop
from keras import backend as K
from keras.models import Model
from keras.models import load_model  
from keras.callbacks import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
%matplotlib inline
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [57]:
def get_split_need_data(X, y):
    X_split_need = []
    y_split_need = []
    for audio_index, audio_feature in enumerate(X):
        label = y[audio_index]
        for split_feature in audio_feature:
            X_split_need.append(split_feature)
            y_split_need.append(label)
    return np.array(X_split_need, dtype=np.float32), np.array(y_split_need)

In [17]:
def get_crnn_model_koz4k(input_shape, n_conv_blocks, n_conv_filters, conv_filter_size, n_lstm_blocks, n_lstm_units):
    model_input = Input(input_shape, name='input')
    layer = model_input
    #convolution blocks
    for i in range(n_conv_blocks):
        layer = Convolution1D(
                filters=n_conv_filters,
                kernel_size=conv_filter_size,
                strides=1,
                name='convolution_' + str(i + 1)
            )(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling1D(pool_size=2)(layer)
    
    layer = Dropout(0.5)(layer)
    #lstm blocks
    for j in range(n_lstm_blocks):
        layer = LSTM(n_lstm_units, 
                     return_sequences=True, 
                     name='lstm_' + str(j+1)
            )(layer)
        
    layer = Dropout(0.5)(layer)
    layer = TimeDistributed(Dense(10))(layer)
    layer = Activation('softmax', name='output_realtime')(layer)
    time_distributed_merge_layer = Lambda(
            function=lambda x: K.mean(x, axis=1), 
            output_shape=lambda shape: (shape[0],) + shape[2:],
            name='output_merged'
        )
    model_output = time_distributed_merge_layer(layer)
    model = Model(model_input, model_output)
    return model

In [25]:
def get_cnn_model_koz4k(input_shape, n_conv_blocks, n_conv_filters, conv_filter_size):
    model_input = Input(input_shape, name='input')
    layer = model_input
    #convolution blocks
    for i in range(n_conv_blocks):
        layer = Convolution1D(
                filters=n_conv_filters,
                kernel_size=conv_filter_size,
                strides=2,
                name='convolution_' + str(i + 1)
            )(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling1D(pool_size=2)(layer)
    #global pooling bloks   
    averagePool = GlobalAveragePooling1D()(layer)
    maxPool = GlobalMaxPooling1D()(layer)
    layer = concatenate([averagePool, maxPool])
    layer = Dropout(rate=0.5)(layer)
    #dense layers
    layer = Dense(10)(layer)
    model_output = Activation('softmax')(layer)
    model = Model(model_input, model_output)
    return model

In [96]:
def wavenet_block(n_conv_filters, conv_filter_size, dilation_rate):
    def f(input_):
        residual = input_
        
        tanh_out = Convolution1D(n_conv_filters, conv_filter_size, padding='causal', 
                                 dilation_rate=dilation_rate)(input_)
        tanh_out = BatchNormalization(axis=-1)(tanh_out)
        tanh_out = Activation('tanh')(tanh_out)
        
        sigmoid_out = Convolution1D(n_conv_filters, conv_filter_size, padding='causal', 
                                    dilation_rate=dilation_rate)(input_)
        sigmoid_out = BatchNormalization(axis=-1)(sigmoid_out)
        sigmoid_out = Activation('sigmoid')(sigmoid_out)
        
        merged = merge([tanh_out, sigmoid_out], mode='mul')
        skip_out = Convolution1D(n_conv_filters, 1, activation='relu')(merged)
        out = merge([skip_out, residual], mode='sum')
        return out, skip_out
    return f

In [104]:
def get_wavenet_model(input_shape, n_wavenet_block, n_conv_filter, conv_filter_size, max_dilation_rate):
    model_input = Input(shape=input_shape, name='input')
    x = Convolution1D(n_conv_filter, 1, padding='same')(model_input)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('tanh')(x)
    skip = []
    for i in range(n_wavenet_block):
        for r in range(int(math.log2(max_dilation_rate))):
            x, s = wavenet_block(n_conv_filter, conv_filter_size, 2**r)(x)
            x = Dropout(0.5)(x)
            skip.append(s)
    
    x = merge(skip, mode='sum')
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    
    x = Convolution1D(n_conv_filter, 1, padding='same')(x)
    x = BatchNormalization(axis=-1)(x)
    x = Activation('relu')(x)
    
    x = Convolution1D(n_conv_filter, 1, padding='same')(x)
    
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    model_output = Dense(10, activation='softmax')(x)
    model = Model(model_input, model_output)
    return model

In [78]:
X_stft_split = np.load('GTZAN/split_features/melspec_featur_split_1024_3s.npy')
y = np.load('GTZAN/onehot_labels.npy')
X_train, X_val, y_train, y_val = train_test_split(X_stft_split, y, test_size=0.2, stratify=y)
print(X_train.shape, X_val.shape)
X_train, y_train= get_split_need_data(X_train, y_train)
X_val, y_val = get_split_need_data(X_val, y_val)
print(X_train.shape, X_val.shape)

(800, 19, 128, 128) (200, 19, 128, 128)
(15200, 128, 128) (3800, 128, 128)


In [None]:
model = get_wavenet_model(input_shape=(X_train.shape[1], X_train.shape[2]), n_wavenet_block=2, n_conv_filter=64, 
                          conv_filter_size=3, max_dilation_rate=16)
model.summary()
lr_change = ReduceLROnPlateau(monitor="loss", factor=0.2, patience=3, min_lr=0.000)
opt = Adam(lr=0.0001)
model.compile(
            loss='categorical_crossentropy',
            optimizer=opt,
            metrics=['accuracy']
        )
model.fit(X_train, y_train,
          validation_data=(X_val, y_val),
          verbose=1, epochs=100, batch_size=64,
          callbacks=[lr_change]
        )

  from ipykernel import kernelapp as app
  name=name)
  del sys.path[0]


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input (InputLayer)               (None, 128, 128)      0                                            
____________________________________________________________________________________________________
conv1d_329 (Conv1D)              (None, 128, 64)       8256        input[0][0]                      
____________________________________________________________________________________________________
batch_normalization_231 (BatchNo (None, 128, 64)       256         conv1d_329[0][0]                 
____________________________________________________________________________________________________
activation_301 (Activation)      (None, 128, 64)       0           batch_normalization_231[0][0]    
___________________________________________________________________________________________

Train on 15200 samples, validate on 3800 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100

In [7]:
print("jdjdjdj", end="")

SyntaxError: invalid syntax (<ipython-input-7-ba68e5f52599>, line 1)