In [25]:
import os, sys
import glob
import json
import tensorflow as tf
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
import random

import keras
from keras.layers.core import Dense, Flatten
from keras.layers import Input, BatchNormalization, Dropout, Conv2D, MaxPooling2D, ZeroPadding2D, Activation
from keras.activations import relu 
from keras.models import Model as KModel
import keras.backend as K
from keras.losses import kullback_leibler_divergence




## Load Weights

In [2]:
G_name = '../model/sound8.npy'
param_G = np.load(G_name, encoding='latin1').item()

## Create Model in Keras

In [3]:
def keras_conv_2d(prev_layer, in_ch, out_ch, k_h=1,
                 k_w=1, d_h=1, d_w=1,p_h=0, p_w=0, pad='valid',
                 name_scope='conv1', weight_dict=None, eps=1e-5, bn_act=True):
    if pad=='valid':
        padded_input = ZeroPadding2D((p_h, p_w))(prev_layer)
    else:
        padded_input = prev_layer
    
    if weight_dict is not None:
        weights = weight_dict[name_scope]
    
    conv = Conv2D(out_ch, (k_h,k_w),
               strides=(d_h, d_w))
    # Need to pass input through so the layer knows its shape. 
    convOut = conv(padded_input)
    if weight_dict is not None:
        conv.set_weights([weights['weights'], weights['biases']])

    # Break if we don't need to add activation or BatchNorm. 
    if not bn_act:
        return convOut
    
    bn = BatchNormalization(epsilon=eps)
    bnOut = bn(convOut)
    
    if weight_dict is not None:
        bn.set_weights([weights[k] for k in ['gamma','beta','mean','var']])
    act = Activation('relu')
    rOut = act(bnOut)
    
    return rOut


def keras_maxpool(prev, k_h=1, k_w=1, d_h=1, d_w=1):
    return MaxPooling2D(pool_size=(k_h,k_w), strides=(d_h,d_w))(prev)

In [5]:
def create_sn_places_only(param_G=None, n_class=401):
    inp = Input(shape=(None, 1, 1))
    x1 = keras_conv_2d(inp, 1, 16, k_h=64, d_h=2, p_h=32, name_scope='conv1', weight_dict=param_G)
    x2 = keras_maxpool(x1, k_h=8, d_h=8)
    x3 = keras_conv_2d(x2, 16, 32, k_h=32, d_h=2, p_h=16, name_scope='conv2', weight_dict=param_G)
    x4 = keras_maxpool(x3, k_h=8, d_h=8)
    x5 = keras_conv_2d(x4, 32, 64, k_h=16, d_h=2, p_h=8, name_scope='conv3',weight_dict=param_G)
    x6 = keras_conv_2d(x5, 64, 128, k_h=8, d_h=2, p_h=4, name_scope='conv4',weight_dict=param_G)
    x7 = keras_conv_2d(x6, 128, 256, k_h=4, d_h=2, p_h=2, name_scope='conv5',weight_dict=param_G)
    x8 = keras_maxpool(x7, k_h=4, d_h=4)
    x9 = keras_conv_2d(x8, 256, 512, k_h=4, d_h=2, p_h=2, name_scope='conv6',weight_dict=param_G)
    x = keras_conv_2d(x9, 512, 1024, k_h=4, d_h=2, p_h=2, name_scope='conv7',weight_dict=param_G)
    if n_class == 401:
        places = keras_conv_2d(x, 1024, n_class, k_h=8, d_h=2,name_scope='conv8_2',weight_dict=param_G,bn_act=False)
    else:
        places = keras_conv_2d(x, 1024, n_class, k_h=8, d_h=2,name_scope='conv8_2',bn_act=False)
    places = Activation('softmax')(places)
    placesModel = KModel(inputs=inp, outputs=places)
    return placesModel

In [6]:
model = create_sn_places_only(param_G=param_G, n_class=365)

In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, 1, 1)        0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, None, 1, 1)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, None, 1, 16)       1040      
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 1, 16)       64        
_________________________________________________________________
activation_1 (Activation)    (None, None, 1, 16)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, None, 1, 16)       0         
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, None, 1, 16)       0         
__________

## Test

In [111]:
def kld(p, q):
    """Calculates Kullback–Leibler divergence"""
    print q.shape
    p = p
    q = q
    q = np.mean(q, axis=1)
    q = np.reshape(q, (1, q.shape[0], 1, q.shape[-1]))
    print p.shape, q.shape
    return np.sum(p * np.log(p / q), axis=(p.ndim - 1))

def softmax(x):
    return np.exp(x) / np.exp(x).sum(axis=-1)

In [81]:
x, sr = librosa.load("./test_data/rank_1.mp3")
x = np.reshape(x, (1, x.shape[0], 1, 1))
y_pred = model.predict(x)

In [82]:
y_true_sample = np.random.rand(365)
y_true_sample = softmax(np.reshape(y_true_sample, (1, 1, 1, 365)))

In [112]:
a = np.array([1, 1, 1, 0, 0,0,0,0,0,0])
a = softmax(np.reshape(a, (1, 1, 1, a.shape[0])) + K.epsilon())
b = np.array([0, 1, 1, 1, 0,0,0,0,0,0])
b = softmax(np.reshape(b, (1, 1, 1, b.shape[0]))+ K.epsilon())
print a
print b
d = kld(a,  b)
print d

[[[[ 0.17936718  0.17936718  0.17936718  0.0659855   0.0659855   0.0659855
     0.0659855   0.0659855   0.0659855   0.0659855 ]]]]
[[[[ 0.0659855   0.17936718  0.17936718  0.17936718  0.0659855   0.0659855
     0.0659855   0.0659855   0.0659855   0.0659855 ]]]]
(1, 1, 1, 10)
(1, 1, 1, 10) (1, 1, 1, 10)
[[[ 0.11338168]]]


In [80]:
for i in range(y_pred.shape[1]):
    p = y_pred[0,i,0]
    q = softmax(y_true_sample[0,0,0])
    d = kld(p, q)
    print d

0.0425030439048
0.0422714633533
0.0422885770774
0.0422996535501
0.0422878140762
0.0422977245349
0.0422968018702
0.0422992727587
0.0422799736301
0.042299607573
0.0422915991529
0.0422910618671
0.042303640829
0.0422901042019
0.0422917335082
0.0422814482037
0.0422826121768
0.0422910264096
0.0422859851033
0.0422850708369
0.0422844375668
0.0422732628041
0.0422938269409
0.0422964009542
0.0423403709286


In [17]:
np.sum(y_pred)

25.0

## Train Example

In [27]:
batch_size = 2
epoch_size = 2000
sample_per_epoch = 1000


[A

In [91]:
def custom_kld(y_true, y_pred):
    y_true = K.clip(y_true, K.epsilon(), 1)
    y_pred = K.mean(K.clip(y_pred, K.epsilon(), 1), axis=1)
    return K.sum(y_true * K.log(y_true / y_pred), axis=-1)

In [29]:
movie_list = []
model.compile(loss=custom_kullback_leibler_divergence, optimizer='adam')
for i in range(epoch_size):
    pbar = tqdm(total=sample_per_epoch)
    loss_val = 0
    for j in range(sample_per_epoch):
        movie_batch = random.sample(movie_list, batch_size)
        x =[]
        y_true = []
        tmp_loss_val = model.train_on_batch(x, y_true)
        print tmp_loss_val
        break
        pbar.update(1)
        loss_val = loss_val + tmp_loss_val
    pbar.close()
    break
    print str(i) +"Epoch Mean Loss :", loss_val / float(sample_per_epoch)




  0%|          | 0/1000 [00:00<?, ?it/s][A[A[A

ValueError: sample larger than population

In [113]:
model.compile(loss=custom_kld, optimizer='adam')
x, sr = librosa.load("./test_data/rank_1.mp3")
x = np.reshape(x, (1, x.shape[0], 1, 1))
y_true_sample = np.random.rand(365)
y_true_sample = softmax(np.reshape(y_true_sample, (1, 1, 1, 365)))
tmp_loss_val = model.train_on_batch(x, y_true_sample)
print tmp_loss_val

ValueError: Cannot feed value of shape (1, 1, 365) for Tensor u'activation_8_target_31:0', which has shape '(?, ?, ?, ?)'