<a href="https://colab.research.google.com/github/ShakilAhmedSumon/speech-cpc/blob/main/CPC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
from tensorflow.keras.utils import Sequence
from random import shuffle
import pandas as pd
import numpy as np
import librosa
import os
import logging
import warnings
from random import shuffle
import numpy as np

from keras.layers import Conv1D, BatchNormalization, LeakyReLU, Flatten, Dense, GRU, TimeDistributed, Input, Lambda
from keras.layers import Dot, Lambda
from keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam
from keras import backend as K
from keras.backend import expand_dims
from keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow as tf
import os
import datetime

In [4]:
def setup_logging(fname, level=logging.DEBUG):
    """
    Create logger instance
    :param fname: name of log file
    :param level: log level
    :return:
    """
    formatter = logging.Formatter('[%(levelname)s]%(asctime)s:%(name)s:%(message)s')
    logger = logging.getLogger()
    logger.setLevel(level)

    # File Handler
    fh = logging.FileHandler(fname)
    fh.setLevel(level)
    fh.setFormatter(formatter)
    logger.addHandler(fh)

    # Stream Handler
    ch = logging.StreamHandler()
    ch.setLevel(logging.WARNING)
    ch.setFormatter(formatter)
    logger.addHandler(ch)

In [30]:
class ContrastiveDataGenerator(Sequence):

    def __init__(self, data_pth='../data', batch_size=10, shuffle=True, seed=42, categories=list(), normalize=True,
                 fs=16000, chunk_size=4096, context_samples=5, contrastive_samples=1):
        """
        Constructor

        :param data_file: path to data file
        :param meta_file:  path to meta file
        :param batch_size: batch size
        :param measurement_ids: list of measurement ids. Dedicated for CV
        :param shuffle:
        :param seed: random seed
        :param test_mode: return samples and signal_ids
        :param normalize: to normalize the data
        """
        self.it = 0
        self.shuffle = shuffle
        self.data_pth = data_pth
        self.normalize = normalize
        self.fs = fs
        self.batch_size = batch_size
        self.seed = seed
        self.context_samples = int(context_samples)
        self.contrastive_samples = int(contrastive_samples)
        self.chunk_size = int(chunk_size)

        # Extract list of files from csv
        # file_list = pd.read_csv(os.path.join(data_pth, 'train_curated.csv'))
        file_list = os.listdir()
        if len(categories) == 0:
            # self.file_list = file_list.fname.tolist()
            self.file_list = file_list
        else:
            self.file_list = file_list.query('labels in @categories').fname.tolist()
        self.list_sz = len(self.file_list)
        self.max_it = int(np.ceil(self.list_sz / self.batch_size))

    def __len__(self):
        return self.max_it

    def on_epoch_end(self):
        """
        Performs at the end of each epoch
        :return:
        """
        l = self.file_list
        shuffle(l)
        self.file_list = l

    def __getitem__(self, item):
        """
        Return one batch
        :param item:
        :return:
        """
        return self.__data_generation(item)

    def __data_generation(self, it):
        """
        Data generator
        :param it:
        :return:
        """
        pos = np.minimum(it * self.batch_size, self.list_sz)
        frames = (self.contrastive_samples+self.context_samples)*self.chunk_size

        i = 0
        context_batch = np.zeros([self.batch_size, self.context_samples, self.chunk_size])
        contrastive_batch = np.zeros([self.batch_size, self.contrastive_samples, self.chunk_size])

        while i < self.batch_size:
            fname = self.file_list[pos]
            pos = (pos+1) % self.list_sz
            signal, sr = librosa.load(fname, sr=self.fs)
            if signal.shape[0]-frames < 0:
                logging.getLogger(__name__).info(' File {:s} is too short'.format(fname))
            else:
                random_shift = np.random.randint(signal.shape[0]-frames)
                batch = signal[random_shift:(frames + random_shift)].reshape((-1, self.chunk_size), order='C')
                context_batch[i, :, :] = batch[:self.context_samples, :]
                contrastive_batch[i, :, :] = batch[self.context_samples:self.context_samples+self.contrastive_samples, :]
                i +=1

        # shuffle data
        #idx = np.random.choice(range(self.batch_size), self.batch_size, replace=False)
        #contrastive_batch = contrastive_batch[idx, :, :]
        labels=np.zeros([self.batch_size, self.batch_size])
        labels=np.identity(self.batch_size)
        labels = labels[:, :, np.newaxis]
        #labels[range(self.batch_size), idx] = 1
        s = ([context_batch[:, :, :, np.newaxis], contrastive_batch[:, :, :, np.newaxis]], labels)
        return s

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
!ls

174-50561-0000.flac  174-50561-0017.flac  84-121123-0014.flac
174-50561-0001.flac  174-50561-0018.flac  84-121123-0015.flac
174-50561-0002.flac  174-50561-0019.flac  84-121123-0016.flac
174-50561-0003.flac  84-121123-0000.flac  84-121123-0017.flac
174-50561-0004.flac  84-121123-0001.flac  84-121123-0018.flac
174-50561-0005.flac  84-121123-0002.flac  84-121123-0019.flac
174-50561-0006.flac  84-121123-0003.flac  84-121123-0020.flac
174-50561-0007.flac  84-121123-0004.flac  84-121123-0021.flac
174-50561-0008.flac  84-121123-0005.flac  84-121123-0022.flac
174-50561-0009.flac  84-121123-0006.flac  84-121123-0023.flac
174-50561-0010.flac  84-121123-0007.flac  84-121123-0024.flac
174-50561-0011.flac  84-121123-0008.flac  84-121123-0025.flac
174-50561-0012.flac  84-121123-0009.flac  84-121123-0026.flac
174-50561-0013.flac  84-121123-0010.flac  84-121123-0027.flac
174-50561-0014.flac  84-121123-0011.flac  84-121123-0028.flac
174-50561-0015.flac  84-121123-0012.flac
174-50561-0016.flac  84-12112

In [14]:
file_list = os.listdir()

In [18]:
def get_encoder(x, emb_size):
    """
    Create encoder
    :param x:
    :return:
    """
    with tf.name_scope('Encoder'):
        with tf.name_scope('embedding_level_1'):
            x = Conv1D(filters=10, strides=5, kernel_size=3)(x)
            x = LeakyReLU()(x)
            x = BatchNormalization()(x)

        with tf.name_scope('embedding_level_2'):
            x = Conv1D(filters=8, strides=4, kernel_size=3)(x)
            x = LeakyReLU()(x)
            x = BatchNormalization()(x)

        with tf.name_scope('embedding_level_2'):
            x = Conv1D(filters=4, strides=2, kernel_size=3)(x)
            x = LeakyReLU()(x)
            x = BatchNormalization()(x)

        with tf.name_scope('embedding_level_4'):
            x = Conv1D(filters=4, strides=2, kernel_size=3)(x)
            x = LeakyReLU()(x)
            x = BatchNormalization()(x)

        with tf.name_scope('embedding_level_5'):
            x = Conv1D(filters=4, strides=2, kernel_size=3)(x)
            x = LeakyReLU()(x)
            x = BatchNormalization()(x)

        with tf.name_scope('embedding_dense'):
            x = Flatten()(x)
            x = Dense(units=emb_size, activation='relu')(x)
    return x

In [19]:
def network_autoregressive(x, code_size):
    """
    Define the network that integrates information along the sequence
    :param x:
    :return:
    """
    return GRU(units=code_size, return_sequences=False, name='autoregressive_context')(x)


In [20]:
def loss_fn(y_true, y_pred):
    """
    Contrastive loss function (eq. 4 from the original article)
    # https://datascience.stackexchange.com/questions/25029/custom-loss-function-with-additional-parameter-in-keras
    :param y_true: labels (0, 1), where 0 means the sample was drawn from noisy distribution; 1 means the sample was
    drawn from the target distribution.
    :param y_pred: density ratio (f value from the original article)
    :return:
    """
    with tf.name_scope('custom_loss_function'):
        divident = K.sum(K.dot(y_true, y_pred), axis=1)
        divider = K.sum(y_pred, axis=1) + K.epsilon()
        l = -K.log(divident / divider)
    return l*1e4

In [21]:
def get_model(chunk_size, context_samples=100, contrastive_samples=10, emd_size=512, gru_size=256):
    """

    :param chunk_size:
    :param context_samples:
    :param contrastive_samples:
    :param emd_size:
    :return:
    """
    K.set_learning_phase(1)

    # Define encoder model
    encoder_input = Input(shape=[chunk_size, 1])
    encoder_model = Model(encoder_input, get_encoder(encoder_input, emb_size=emd_size), name='encoder')
    encoder_model.summary()

    # Define rest of the model
    x_input = Input(shape=[context_samples, chunk_size, 1], name='context_data')
    y_input = Input(shape=[contrastive_samples, chunk_size, 1], name='contrastive_data')

    # Workaround context
    x_encoded = TimeDistributed(encoder_model, name='Historical_embeddings')(x_input)
    context = network_autoregressive(x_encoded, gru_size)
    context = Lambda(lambda x: expand_dims(x, axis=-1), name='transpose_context')(context)

    # Make predictions for the next predict_terms timesteps
    z = TimeDistributed(encoder_model, name='Contrastive_embeddings')(y_input)
    # Equation 3
    z2 = Dense(units=gru_size, name='W', use_bias=False)(z)
    z2 = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1)), name='transpose')(z2)
    d = Lambda(lambda x: Dot(axes=1)(x), name='multiplication')([z2, context])

    f = Lambda(lambda x: K.exp(x), name='exponent')(d)

    # Model
    cpc_model = Model(inputs=[x_input, y_input], outputs=f) #, y_labels
    cpc_model.summary()
    return cpc_model

In [27]:
def train():
    tmr = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
    # params
    K.set_learning_phase(1)
    chunk_size = 4096
    context_samples = 5
    contrastive_samples = 1
    emd_size = 512
    batch_size = 8

    params = {'model_name': 'cpc1'}
    params.update({'checkpointer': {'verbose': 1,
                                   'save_best_only': True,
                                   'mode': 'min',
                                    'monitor': 'loss'}})

    model_params = {'chunk_size': chunk_size,
                    'context_samples': context_samples,
                    'contrastive_samples': contrastive_samples,
                    'emd_size': emd_size}

    #categories = ['Marimba_and_xylophone', 'Scissors', 'Gong', 'Printer', 'Keys_jangling', 'Zipper_(clothing)',
    #              'Computer_keyboard', 'Finger_snapping']

    categories = ()
    gen_params = {'categories': categories,
                  'data_pth': '/',
                  'batch_size': batch_size,
                  'shuffle': True,
                  'seed': 42,
                  'chunk_size': chunk_size,
                  'context_samples': context_samples,
                  'contrastive_samples': contrastive_samples}

    output_folder = 'models'
    tensorboard = TensorBoard(log_dir='./logs/' + 'cpc' + '_' + tmr,
                              write_graph=True)
    checkpointer = ModelCheckpoint(filepath=os.path.join(output_folder, params.get('model_name')+'.hdf5'),
                                   **params['checkpointer'])

    callbacks = [tensorboard, checkpointer]
    model = get_model(**model_params)

    data_gen = ContrastiveDataGenerator(**gen_params)

    model.compile(loss=loss_fn, optimizer=Adam(lr=1e-5))
    model.fit_generator(generator=data_gen, epochs=10, callbacks=callbacks)

In [31]:
train()



Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 4096, 1)]         0         
                                                                 
 conv1d_20 (Conv1D)          (None, 819, 10)           40        
                                                                 
 leaky_re_lu_20 (LeakyReLU)  (None, 819, 10)           0         
                                                                 
 batch_normalization_20 (Bat  (None, 819, 10)          40        
 chNormalization)                                                
                                                                 
 conv1d_21 (Conv1D)          (None, 205, 8)            248       
                                                                 
 leaky_re_lu_21 (LeakyReLU)  (None, 205, 8)            0         
                                                           

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/10
Epoch 00001: loss improved from inf to 0.00119, saving model to models/cpc1.hdf5
Epoch 2/10
Epoch 00002: loss improved from 0.00119 to 0.00109, saving model to models/cpc1.hdf5
Epoch 3/10
Epoch 00003: loss did not improve from 0.00109
Epoch 4/10
Epoch 00004: loss improved from 0.00109 to 0.00078, saving model to models/cpc1.hdf5
Epoch 5/10
Epoch 00005: loss improved from 0.00078 to 0.00067, saving model to models/cpc1.hdf5
Epoch 6/10
Epoch 00006: loss improved from 0.00067 to 0.00061, saving model to models/cpc1.hdf5
Epoch 7/10
Epoch 00007: loss did not improve from 0.00061
Epoch 8/10
Epoch 00008: loss did not improve from 0.00061
Epoch 9/10
Epoch 00009: loss did not improve from 0.00061
Epoch 10/10
Epoch 00010: loss did not improve from 0.00061
