In [1]:
import tensorflow as tf
from tensorflow.keras.layers import  LSTM, Dense, Flatten, Reshape
from tensorflow.keras.datasets import mnist
import numpy as np
import os
import _pickle as pickle
import argparse
import scipy.io.wavfile as wav
from python_speech_features import delta
from python_speech_features import mfcc
from tensorflow.keras.models import Model
import math
from tqdm import tqdm
from scipy.spatial.distance import cosine

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
  except RuntimeError as e:
    print(e)
tf.compat.v1.logging.set_verbosity(40)

2023-03-10 16:22:23.497655: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-10 16:22:27.934572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda/lib64
2023-03-10 16:22:27.935345: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda/lib64


In [2]:
class Dataset(object):
    """Creat data class."""

    def __init__(self, wav_path, max_length=None):
        """Initialize dataset."""

        self.feature_dim = 39
        data, words = self.get_data(wav_path)
        self.data = data
        self.words = words
        uwords = np.unique(words)
        word2id = {v: k for k, v in enumerate(uwords)}
        self.id2word = {k: v for k, v in enumerate(uwords)}
        ids = [word2id[w] for w in words]

        self.ids = np.array(ids, dtype=np.int32)

        self.num_examples = len(self.ids)
        if max_length == None:
            self.max_length = np.max([len(self.data[x]) for x in range(0, self.num_examples-1)])
        else:
            self.max_length = max_length
    def get_data(self, wav_path):
        data = []
        labels = []
        for elt in os.listdir(wav_path):
            if ".wav" in elt:
                (rate, signal) = wav.read(wav_path+elt)
                mfcc_static = mfcc(signal, rate)
                mfcc_deltas = delta(mfcc_static, 2)
                mfcc_delta_deltas = delta(mfcc_deltas, 2)

                features = np.hstack([mfcc_static, mfcc_deltas, mfcc_delta_deltas])
                features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)

                data.append(features)
                labels.append(elt)
                data_len = len(labels)

        return data, labels

    def batch(self, batch_size, max_length=None):
        self.tot_batch = self.num_examples//batch_size-1
        for batch_ind in range(0, self.num_examples, batch_size):
            temp_tens = []
            temp_label = []
            if batch_ind+batch_size>self.num_examples:
                end= batch_ind+batch_size- self.num_examples
            else:
                end = batch_ind+batch_size
            for data_ind in range(batch_ind, end):
#                 if batch_ind+batch_size<= len(self.data):
                
#                 else:
#                     max_length = np.max([len(self.data[x]) for x in range(batch_ind, len(self.data)-1)])
                paddings = tf.constant([[0, self.max_length-len(self.data[data_ind])], [0, 0]])
                padded = tf.pad(self.data[data_ind], paddings, "CONSTANT") 
                temp_tens.append(padded)
                temp_label.append(self.words[data_ind])
#             yield (tf.convert_to_tensor(temp_tens, dtype=tf.float32), tf.convert_to_tensor(temp_label, dtype=object))
            if len(temp_tens)>1:
                yield (tf.reshape(temp_tens, shape=(len(temp_tens),self.max_length, 39)))
    def batch_pairs(self, batch_size, max_length=None):
        self.tot_batch = self.num_examples//batch_size-1
        x_inp = []
        y_inp = []
        final_x = []
        final_y = []
        for ind_x, tens_x in enumerate(self.data):
            for ind_y, tens_y in enumerate(self.data[ind_x:]):
                if self.words[ind_x] == self.words[ind_y]:
                    x_inp.append(tens_x)
                    y_inp.append(tens_y)
        for batch_ind in range(0, len(x_inp), batch_size):
            temp_tens = []
            temp_label = []
            if batch_ind+batch_size>len(x_inp):
                end= batch_ind+batch_size- len(x_inp)
            else:
                end = batch_ind+batch_size
            for data_ind in range(batch_ind, end):

                paddings = tf.constant([[0, self.max_length-len(x_inp[data_ind])], [0, 0]])
                padded_x = tf.pad(x_inp[data_ind], paddings, "CONSTANT") 
                final_x.append(padded_x)
                paddings = tf.constant([[0, self.max_length-len(y_inp[data_ind])], [0, 0]])
                padded_y = tf.pad(y_inp[data_ind], paddings, "CONSTANT") 
                final_y.append(padded_y)
            if len(final_x)>1:
                yield (tf.reshape(final_x, shape=(len(final_x),self.max_length, 39)),tf.reshape(final_y, shape=(len(final_y),self.max_length, 39)))

    
    def get_batch_nb(self):
        return self.tot_batch
    
    def get_max_len(self):
        return self.max_length
    
    def batch_2(self):
        final = []
        
        for data_ind in range(0, self.num_examples):

            max_length = np.max([len(self.data[x]) for x in range(0, self.num_examples-1)])

            paddings = tf.constant([[0, max_length-len(self.data[data_ind])], [0, 0]])
            
            padded = tf.pad(self.data[data_ind], paddings, "CONSTANT") 
            
            final.append(padded)

        x_inp = np.asarray([x for x in final if np.mean(x) !=0 and not math.isnan(np.mean(x))])
        return x_inp

In [3]:
train_dataset = Dataset("/home/getalp/leferrae/post_doc/corpora/cv-corpus-12.0-2022-12-07/pt/trainWords/")
print(train_dataset.get_max_len())
# test_dataset = Dataset("/home/getalp/leferrae/post_doc/corpora/cv-corpus-12.0-2022-12-07/pt/devWords/")

# train_dataset = Dataset("/home/getalp/leferrae/post_doc/corpora/guinee_casa/train/")
# test_dataset = Dataset("/home/getalp/leferrae/post_doc/corpora/guinee_casa/dev/")

193


In [4]:
test_dataset = Dataset("/home/getalp/leferrae/post_doc/corpora/cv-corpus-12.0-2022-12-07/pt/devWords/", max_length = train_dataset.get_max_len())

In [5]:
print(train_dataset.batch_2().shape)

2023-03-08 16:17:00.743781: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-08 16:17:02.429296: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5120 MB memory:  -> device: 0, name: Quadro P6000, pci bus id: 0000:04:00.0, compute capability: 6.1
2023-03-08 16:17:02.432873: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 10493 MB memory:  -> device: 1, name: Quadro P6000, pci bus id: 0000:82:00.0, compute capability: 6.1


(3150, 193, 39)


In [5]:
class Autoencoder(Model):
  def __init__(self, latent_dim):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim   
    self.encoder = tf.keras.Sequential([
      LSTM(latent_dim, activation='relu', return_sequences=True),
      LSTM(latent_dim, activation='relu', return_sequences=True),
      LSTM(latent_dim, activation='relu'),
    ])
    self.decoder = tf.keras.Sequential([
      Dense(7527, activation='sigmoid'),
      Reshape((193, 39))
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded
  def make_emb(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return encoded


In [25]:
latent_dim = 64 

class Autoencoder(Model):
  def __init__(self, latent_dim):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim   
    self.encoder = tf.keras.Sequential([
      Flatten(),
      Dense(latent_dim, activation='relu', return_sequences=True),
    ])
    self.decoder = tf.keras.Sequential([
      Dense(7527, activation='sigmoid', return_sequences=True),
      Reshape((193, 39))
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

autoencoder = Autoencoder(latent_dim)

TypeError: ('Keyword argument not understood:', 'return_sequences')

In [14]:
loss = tf.keras.losses.MeanSquaredError()
def grad(model, inputs, outputs):
    with tf.GradientTape() as tape:
        out = model(inputs)
        loss_value = loss(outputs, out)
    return loss_value, tape.gradient(loss_value, model.trainable_variables), inputs, out

In [7]:
autoencoder = Autoencoder(64)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
# autoencoder.save_weights("./models/AE_{}_ckpt/ae_rand_model.ckpt".format(rep), overwrite=True)
global_step = tf.Variable(0)
n_epochs = 15

2023-03-10 16:23:26.216344: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-10 16:23:29.197759: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5120 MB memory:  -> device: 0, name: Quadro P6000, pci bus id: 0000:04:00.0, compute capability: 6.1
2023-03-10 16:23:29.201146: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 11055 MB memory:  -> device: 1, name: Quadro P6000, pci bus id: 0000:82:00.0, compute capability: 6.1


In [8]:
autoencoder.compile(optimizer=optimizer, loss=tf.keras.losses.MeanSquaredError())
autoencoder.build((None,193, 39))
autoencoder.summary()
print(np.array(train_dataset.batch_2()).shape)
# autoencoder.fit(train_dataset.batch_2(), train_dataset.batch_2(),
#             epochs=10,
#             shuffle=True,
#             validation_data=(test_dataset.batch_2(), test_dataset.batch_2()))



Model: "autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 64)                92672     
                                                                 
 sequential_1 (Sequential)   (None, 193, 39)           489255    
                                                                 
Total params: 581,927
Trainable params: 581,927
Non-trainable params: 0
_________________________________________________________________
(3150, 193, 39)


In [13]:
autoencoder.fit(train_dataset.batch_2(), train_dataset.batch_2(),
            epochs=10,
            shuffle=True,
            validation_data=(test_dataset.batch_2(), test_dataset.batch_2()))



Epoch 1/10


2023-03-07 09:15:48.117096: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7fb4800192e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-03-07 09:15:48.117152: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Quadro P6000, Compute Capability 6.1
2023-03-07 09:15:48.117166: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (1): Quadro P6000, Compute Capability 6.1
2023-03-07 09:15:48.123862: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-03-07 09:15:48.386383: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-03-07 09:15:48.460664: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fb5481c8100>

In [9]:
import sys

def print_progress_bar(index, total, label):
    n_bar = 50  # Progress bar width
    progress = index / total
    sys.stdout.write('\r')
    sys.stdout.write(f"[{'=' * int(n_bar * progress):{n_bar}s}] {int(100 * progress)}%  {label}")
    sys.stdout.flush()

In [92]:
def eval_QbE(model, max_length):
    queries = {}
    test = {}
    results = {}
    rootTest = "/home/getalp/leferrae/post_doc/classifier_zing/wordsTest/"
    rootQueries = "/home/getalp/leferrae/post_doc/corpora/guinee_casa/queries/"
    temp_emb = []
    temp_labels = []
    for elt in tqdm(os.listdir(rootQueries)):
        if ".wav" in elt:
            (rate, signal) = wav.read(rootQueries+elt)
            mfcc_static = mfcc(signal, rate)
            mfcc_deltas = delta(mfcc_static, 2)
            mfcc_delta_deltas = delta(mfcc_deltas, 2)
            features = np.hstack([mfcc_static, mfcc_deltas, mfcc_delta_deltas])
            features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)
            if len(features)>max_length:
                tooMuch = len(features)-max_length
                startTrunc = tooMuch//2
                endTruc = tooMuch//2+tooMuch%2
                padded = tf.constant(features[startTrunc:-endTrunc])
            else:
                paddings = tf.constant([[0, max_length-len(features)], [0, 0]])
                padded = tf.pad(features, paddings, "CONSTANT") 
            temp_emb.append(padded)
            temp_labels.append(elt.replace(".wav", ""))
            
    embeddings = model.make_emb(tf.reshape(temp_emb, shape=(len(temp_emb),max_length, 39)))
                                
    for i, emb in enumerate(embeddings):
        queries[temp_labels[i]] = emb
    
    temp_emb = []
    temp_labels = []
    for elt in tqdm(os.listdir(rootTest)):
        (rate, signal) = wav.read(rootTest+elt)
        mfcc_static = mfcc(signal, rate)
        mfcc_deltas = delta(mfcc_static, 2)
        mfcc_delta_deltas = delta(mfcc_deltas, 2)
        temp_labels.append(elt)
        features = np.hstack([mfcc_static, mfcc_deltas, mfcc_delta_deltas])
        features = (features - np.mean(features, axis=0)) / np.std(features, axis=0)
        if len(features)>max_length:
            tooMuch = len(features)-max_length
            startTrunc = tooMuch//2
            endTrunc = tooMuch//2+tooMuch%2
            padded = tf.constant(features[startTrunc:-endTrunc])
        else:
            paddings = tf.constant([[0, max_length-len(features)], [0, 0]])
            padded = tf.pad(features, paddings, "CONSTANT") 
        temp_emb.append(padded)
#     embeddings = model.make_emb(tf.reshape(temp_emb, shape=(len(temp_emb),max_length, 39)))
    embeddings = model.make_emb(tf.reshape(temp_emb, shape=(len(temp_emb),max_length, 39)))
    for i, emb in enumerate(embeddings):
        elt = temp_labels[i]
        queries[elt.replace(".wav", "")] = emb
        name = elt.split("_")[0]
        test[elt.replace(".wav", "")] = {"embedding" : emb, "label" : name}
    for testWord in test:
        scores = []
        for query in queries:
            score = cosine(test[testWord]["embedding"][0], queries[query][0])
            scores.append((query, score))
        scores.sort(key=lambda x : x[1])
        results[testWord] = scores
    top1 = 0
    top2 = 0
    top5 = 0
    tot = 0
    for elt in results:
        tot+=1
        name = elt.split("_")[0]
        list2 = [x[0] for x in results[elt][:2]]
        list5 = [x[0] for x in results[elt][:5]]

        if name == results[elt][0][0]:
            top1+=1
        if name in list2:
            top2+=1
        if name in list5:
            top5+=1
    print("\n",top1/tot*100, top2/tot*100, top5/tot*100)
    


In [94]:
for epoch in range(10):
    print("\nEpoch: ", epoch)

    batch_id = 0
    val_losses=[]
    
    for index, x in enumerate(test_dataset.batch(batch_size=300)):
        loss_value, grads, inputs, reconstruction = grad(autoencoder, x, x)
        print_progress_bar(index, test_dataset.get_batch_nb(), loss_value)
        val_losses.append(loss_value)
    print("\nvalidation loss : {}".format(np.mean(val_losses)))
    for index, x in enumerate(train_dataset.batch(batch_size=32)):
        
        loss_value, grads, inputs, reconstruction = grad(autoencoder, x, x)
        optimizer.apply_gradients(zip(grads, autoencoder.trainable_variables),
                            global_step)
        print_progress_bar(index, train_dataset.get_batch_nb()+1, loss_value)
        batch_id +=1
    eval_QbE(autoencoder, train_dataset.get_max_len())




Epoch:  0
validation loss : 0.5757984519004822

100%|██████████████████████████████████████████| 21/21 [00:00<00:00, 206.80it/s]
100%|████████████████████████████████████████| 140/140 [00:00<00:00, 217.86it/s]



 2.142857142857143 2.142857142857143 5.0

Epoch:  1
validation loss : 0.5712370276451111

100%|██████████████████████████████████████████| 21/21 [00:00<00:00, 220.23it/s]
100%|████████████████████████████████████████| 140/140 [00:00<00:00, 270.05it/s]



 2.142857142857143 2.142857142857143 5.0

Epoch:  2
validation loss : 0.5667847394943237

100%|██████████████████████████████████████████| 21/21 [00:00<00:00, 228.73it/s]
100%|████████████████████████████████████████| 140/140 [00:00<00:00, 247.45it/s]



 2.142857142857143 2.142857142857143 5.0

Epoch:  3
validation loss : 0.5624415278434753

100%|██████████████████████████████████████████| 21/21 [00:00<00:00, 228.47it/s]
100%|████████████████████████████████████████| 140/140 [00:00<00:00, 215.52it/s]



 2.142857142857143 2.142857142857143 5.0

Epoch:  4
validation loss : 0.5582073330879211

100%|██████████████████████████████████████████| 21/21 [00:00<00:00, 226.84it/s]
100%|████████████████████████████████████████| 140/140 [00:00<00:00, 223.21it/s]



 2.142857142857143 2.142857142857143 5.0

Epoch:  5
validation loss : 0.5540812611579895

KeyboardInterrupt: 

In [17]:
for epoch in range(10):
    print("\nEpoch: ", epoch)

    batch_id = 0

    for index, tup_d in enumerate(train_dataset.batch_pairs(batch_size=16)):
        x,y = tup_d[0], tup_d[1]

        loss_value, grads, inputs, reconstruction = grad(autoencoder, x, y)
        optimizer.apply_gradients(zip(grads, autoencoder.trainable_variables),
                            global_step)
        print_progress_bar(index, train_dataset.get_batch_nb()+1, loss_value)
        batch_id +=1
    eval_QbE(autoencoder, train_dataset.get_max_len())


Epoch:  0
Object was never used (type <class 'tensorflow.python.ops.tensor_array_ops.TensorArray'>):
<tensorflow.python.ops.tensor_array_ops.TensorArray object at 0x7f1bac43caf0>
If you want to mark it as used call its "mark_used()" method.
It was originally created here:
  File "/home/getalp/leferrae/miniconda3/envs/py38/lib/python3.8/site-packages/keras/backend.py", line 5133, in <genexpr>
    ta.write(ta_index_to_write, out)  File "/home/getalp/leferrae/miniconda3/envs/py38/lib/python3.8/site-packages/tensorflow/python/util/tf_should_use.py", line 243, in wrapped

2023-03-10 16:39:16.640972: W tensorflow/tsl/framework/bfc_allocator.cc:479] Allocator (GPU_0_bfc) ran out of memory trying to allocate 37.21MiB (rounded to 39020032)requested by op RealDiv
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-03-10 16:39:16.643321: I tensorflow/tsl/framework/bfc_allocator.cc:1034] BFCAllocator dump for GPU_0_bfc
2023-03-10 16:39:16.643440: I tensorflow/tsl/framework/bfc_allocator.cc:1041] Bin (256): 	Total Chunks: 23, Chunks in use: 23. 5.8KiB allocated for chunks. 5.8KiB in use in bin. 104B client-requested in use in bin.
2023-03-10 16:39:16.643459: I tensorflow/tsl/framework/bfc_allocator.cc:1041] Bin (512): 	Total Chunks: 0, Chunks in use: 0. 0B allocated for chunks. 0B in use in bin. 0B client-requested in use in bin.
2023-03-10 16:39:16.643477: I tensorflow/tsl/framework/bfc_allocator.cc:1041

ResourceExhaustedError: {{function_node __wrapped__RealDiv_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:RealDiv]