# Forked Neural Network

In [1]:
## Standard python libraries
import numpy as np
import time
import sys
import matplotlib.pylab as plt
import functools
%matplotlib inline

## Magnolia data iteration
sys.path.append('../../')
from src.features.mixer import FeatureMixer
from src.features.wav_iterator import batcher
from src.features.supervised_iterator import SupervisedIterator, SupervisedMixer

In [2]:
import tensorflow as tf
sess = tf.Session()
print(tf.__version__)

1.1.0-rc2


## Set up the data

In [3]:
batchsize = 64
datashape = (16, 257)
embedding_size = 600
libridev='/local_data/teams/magnolia/libri-dev.h5'
libritrain='/local_data/teams/magnolia/processed_train-clean-100.h5'

## Create a supervised mixer and batcher



In [4]:
mixer = SupervisedMixer([libritrain,libritrain,libritrain], shape=datashape, 
                     mix_method='add', diffseed=True, return_key=True)
# Check the time
tbeg = time.clock()
X, Y, I = mixer.get_batch(batchsize)
tend = time.clock()
print('Supervised feature mixer with 3 libridev sources timed at ', (tend-tbeg), 'sec')

Supervised feature mixer with 3 libridev sources timed at  0.4762789999999999 sec


## NEURAL NETWORK

The lost function takes in as input the variable `Vlast` for last layer ($V_{last}$, where a vector in $V_{last}$ is $v_{l}$). (That's the first couplet lines, where one just makes a tensorflow variable `Vlasttf`.)

The actual cost function is the *word2vec* objective function, where samples are positively and negatively sampled and then mixed. Let $A$ be a matrix of "attractors", so to speak. (We'll not use that terminology later on.) Then a positively sampled vector $a_p$ and a few negatively sampled ones $a_{n_1}$ and $a_{n_2}$ are all columns in $A$. The loss over a batch $B$ is denoted `tfbatchlo`, and is specified as:

$$ \mathcal{L}(v_{last}) = \log \sigma ( v_l^T a_p) + \sum_j \log \sigma( -1 \cdot v_l^T a_{n_j} )$$

## Forked neural network model

In [5]:
def scope(function):
    attribute = '_cache_' + function.__name__
    name = function.__name__

    @property
    @functools.wraps(function)
    def decorator(self):
        if not hasattr(self,attribute):
            with tf.device("/gpu:0"):
                with tf.variable_scope(name):
                    setattr(self,attribute,function(self))
        return getattr(self,attribute)
    
    return decorator

class ClusterModel:
    def __init__(self, X, Y, F, I, layer_size, embedding_size, num_labels):
        
        self.Vclass = tf.Variable(tf.random_normal( [embedding_size, num_labels, F] ), dtype=tf.float32)
        
        self.X = X
        self.Y = Y
        
        self.F = F
        self.I = I
        
        self.layer_size = layer_size
        self.embedding_size = embedding_size
                
        self.network
        self.cost
        self.optimizer
        
    
    def weight_variable(self,shape):
        initial = tf.truncated_normal(shape, stddev=tf.sqrt(2.0/shape[0]))
        return tf.Variable(initial)
    
    def conv1d(self,x, W):
        return tf.nn.conv1d(x, W, stride=1, padding='SAME')
    
    def conv1d_layer(self,in_layer,shape):
        weights = self.weight_variable(shape)
        biases = self.weight_variable([shape[-1]])
        
        return self.conv1d(in_layer,weights) + biases
    
    def BLSTM(self, X, size, scope):
        forward_input = X
        backward_input = tf.reverse(X, [1])
        
        with tf.variable_scope('forward_' + scope):
            forward_lstm = tf.contrib.rnn.BasicLSTMCell(size//2)
            forward_out, f_state = tf.nn.dynamic_rnn(forward_lstm, forward_input, dtype=tf.float32)
        
        with tf.variable_scope('backward_' + scope):
            backward_lstm = tf.contrib.rnn.BasicLSTMCell(size//2)
            backward_out, b_state = tf.nn.dynamic_rnn(backward_lstm, backward_input, dtype=tf.float32)
        
        return tf.concat([forward_out[:,:,:], backward_out[:,::-1,:]], 2)
    
    @scope
    def network(self):
        shape = tf.shape(self.X)
        
        BLSTM_1 = self.BLSTM(self.X, self.layer_size, 'one')
        BLSTM_2 = self.BLSTM(BLSTM_1, self.layer_size, 'two')
        
        feedforward = self.conv1d_layer(BLSTM_2,[1,self.layer_size,self.embedding_size*self.F])
        
        embedding = tf.reshape(feedforward,[shape[0],shape[1],self.F,self.embedding_size]) 
        embedding = tf.nn.l2_normalize(embedding,3)
        
        return embedding
    
    @scope
    def cost(self):        
        
        Xshape=tf.shape(self.X)
        Yshape=tf.shape(self.Y)
        
        # things that are necessary for the cost function
        Vin = self.network
        I = tf.expand_dims( self.I, axis=2 )
        Y = self.Y
        Vclass = self.Vclass
        
        print(tf.shape(Vin))
        
        # gather the appropriate vectors
        Vout = tf.gather_nd( tf.transpose(Vclass, perm=[1,2,0]), I )
        
        # Broadcasted Vi and Vo
        Vinbroad = tf.reshape( Vin, [Yshape[0], 1, Yshape[2], Yshape[3], self.embedding_size])
        Voutbroad= tf.reshape( Vout, [Yshape[0], Yshape[1], 1, Yshape[3], self.embedding_size] )
                
        # Correlate all the vectors:
        lossfxn = - tf.log( tf.nn.sigmoid( - Y * tf.reduce_sum(Vinbroad * Voutbroad, 4) ) )
        
        # Sum correlations over positive and negative correlations
        lossfxn = tf.reduce_sum( lossfxn, 1 )
        
        # Average over all the batches
        lossfxn = tf.reduce_mean( lossfxn, 0)
        
        # To do: put weight by pre-emphasis or gradient confidence
        lossfxn = tf.reduce_mean( lossfxn )
        
        return lossfxn

    @scope
    def optimizer(self):
        opt = tf.train.AdamOptimizer()
        cost = self.cost
        return opt.minimize(cost)

In [None]:
tf.reset_default_graph()

F = 257
layer_size=50
embedding_size=300
X = tf.placeholder("float", [None,None,F])
Y = tf.placeholder("float", [None, None,None,F])
I = tf.placeholder(dtype=tf.int32)

num_labels=251

model = ClusterModel(X, Y, F, I, layer_size, embedding_size, num_labels)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

iterations = []
costs = []

print("Initialized")

Tensor("cost/Shape_2:0", shape=(4,), dtype=int32, device=/device:GPU:0)
Initialized


In [None]:
costs = []
for iteration in range(1000):
    Xdata, Ydata, Idata = mixer.get_batch(64, out_TF=None)
    Ydata = Ydata.reshape( [Ydata.shape[0], Ydata.shape[1], 16, 257] )

    optloss, cost = sess.run([model.optimizer, model.cost], feed_dict={X: abs(Xdata), Y:Ydata, I:Idata})
    costs += [cost]
    sys.stdout.write('\rCost function = '+str(cost))

Cost function = 1.90061