In [None]:
!nvidia-smi
from google.colab import drive # run this only on colab
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/Jellyfish/code_and_data/
%ls
%tensorflow_version 2.x

Thu Dec 17 08:09:25 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.45.01    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8    10W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Main code starts here

In [None]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers, callbacks

## Make the Dataset

In [None]:
# load class data
files_n_classes = np.loadtxt(
    fname= "#shared_train/labels_train.csv",
    delimiter=",",
    dtype='S',
    skiprows=1
).astype(str)
for i in range(len(files_n_classes)):
    files_n_classes[i][0] = files_n_classes[i][0].split('.')[0]

unique_classes, counts = np.unique(files_n_classes[:,1], return_counts=True)

# seperate the files into test, dev and train to avoid data repetation, load sets
test_files_n_classes = np.loadtxt("cellar/test_files_n_classes.txt", dtype=str)
train_files_n_classes = np.loadtxt("cellar/train_files_n_classes.txt", dtype=str)
dev_files_n_classes = np.loadtxt("cellar/dev_files_n_classes.txt", dtype=str)

# generate train, dev and test sets by cropping concatenating given files
num_train = 1024
num_dev = 128
num_test = 128
def generate_rnd_seq(choices, files_n_classes, min_classes, max_classes, use_rndcrop=False):
    history = {'rnd_file':[], 'crop_start':[], 'n_timesteps':[]}
    raw_seq = np.array([])
    seq_spec = []
    while len(np.unique(raw_seq)) < min_classes: 
        # rnd_size = np.random.randint(min_classes, max_classes)
        rnd_size = max_classes
        raw_seq = np.random.choice(np.arange(len(choices)), size= rnd_size)
    for event_idx in raw_seq:
        event_type = choices[event_idx]
        event_example_files = np.squeeze(files_n_classes[:,0][np.argwhere(files_n_classes[:,1] == event_type)])
        rnd_file = np.random.choice(event_example_files)
        spec = np.load(f"cellar/spectograms/{rnd_file}.npy")
        if use_rndcrop:
            n_timesteps = np.random.randint(6, spec.shape[1]+1) #if spec.shape[1]>6 else spec.shape[1]
            crop_start = np.random.randint(0, spec.shape[1] - n_timesteps + 1) 
            cropped_spec = spec[:, crop_start: crop_start + n_timesteps]
            seq_spec.append(cropped_spec)
        else:
            crop_start = 0
            n_timesteps = spec.shape[1]
            seq_spec.append(spec)
        history['rnd_file'].append(rnd_file)
        history['crop_start'].append(crop_start)
        history['n_timesteps'].append(n_timesteps)
    seq = raw_seq[np.insert(np.diff(raw_seq).astype(bool), 0, True)]
    noncrepeat_seq = [choices[idx] for idx in seq]
    raw_seq = [choices[idx] for idx in raw_seq]
    spec_seq = np.concatenate(seq_spec, 1)
    history['raw_seq'] = raw_seq
    history['noncrepeat_seq'] = noncrepeat_seq
    return spec_seq, np.asarray(noncrepeat_seq), np.asarray(raw_seq), history

def mkdirs(string):
    if not os.path.exists(f"cellar/task2_sequences/seq_spectograms/{string}"): os.makedirs(f"cellar/task2_sequences/seq_spectograms/{string}")
    if not os.path.exists(f"cellar/task2_sequences/labels_noncrepeat/{string}"): os.makedirs(f"cellar/task2_sequences/labels_noncrepeat/{string}")
    if not os.path.exists(f"cellar/task2_sequences/labels_raw/{string}"): os.makedirs(f"cellar/task2_sequences/labels_raw/{string}")
    if not os.path.exists(f"cellar/task2_sequences/history/{string}"): os.makedirs(f"cellar/task2_sequences/history/{string}")

def makeit(string, num, unique_classes, files_n_classes):
    dataset = []
    class_map = {uniq: i for i, uniq in enumerate(unique_classes)}
    for i in range(num):
        spec_seq, noncrepeat_seq, raw_seq, hist = generate_rnd_seq(unique_classes, files_n_classes, 1, 5)
        noncrepeat_idx_seq = [class_map[event_name] for event_name in noncrepeat_seq]
        raw_idx_seq = [class_map[event_name] for event_name in raw_seq]
        np.save(f"cellar/task2_sequences/seq_spectograms/{string}/{i}.npy", spec_seq)
        np.savetxt(f"cellar/task2_sequences/labels_noncrepeat/{string}/{i}.txt", noncrepeat_seq, fmt="%s")
        np.savetxt(f"cellar/task2_sequences/labels_raw/{string}/{i}.txt", raw_seq, fmt="%s")
        np.savetxt(f"cellar/task2_sequences/history/{string}/{i}.txt", [hist], fmt='%s')
        dataset.append([f"cellar/task2_sequences/seq_spectograms/{string}/{i}.npy", noncrepeat_idx_seq, raw_idx_seq, noncrepeat_seq, raw_seq])
    np.save(f"cellar/task2_sequences/{string}_set.npy", np.asarray(dataset))
    print(f" made {string}")
    return np.asarray(dataset)

if os.path.isfile("cellar/task2_sequences/train_set.npy") and \
    os.path.isfile("cellar/task2_sequences/dev_set.npy") and \
    os.path.isfile("cellar/task2_sequences/test_set.npy"):
    train_set = np.load("cellar/task2_sequences/train_set.npy", allow_pickle=True)
    dev_set = np.load("cellar/task2_sequences/dev_set.npy", allow_pickle=True)
    test_set = np.load("cellar/task2_sequences/test_set.npy", allow_pickle=True)
else:
    print("making datasets")
    mkdirs("train"); mkdirs("dev"); mkdirs("test")
    train_set = makeit("train", num_train, unique_classes, train_files_n_classes)
    dev_set = makeit("dev", num_dev, unique_classes, dev_files_n_classes)
    test_set = makeit("test", num_test, unique_classes, test_files_n_classes)
    

## TRAINING

### model for training

In [None]:
blank_label = 10 # label denoting blank
freq_dim = 52

In [None]:
# ctc loss layer
class ctclosslayer(layers.Layer):
    def __init__(self, trainable=True, name=None, dtype=None, dynamic=False, **kwargs):
        super(ctclosslayer, self).__init__(trainable=trainable, name=name, dtype=dtype, dynamic=dynamic, **kwargs)

    def call(self, logits, labels, **kwargs):
        considered_logits = tf.identity(logits[:, 2:, :]) 

        batch_size = tf.shape(labels)[0]
        label_length = tf.shape(labels)[1] * tf.ones((batch_size, 1), tf.int32)
        logit_length = tf.shape(considered_logits)[1] * tf.ones((batch_size, 1), tf.int32)
        ctcloss = tf.keras.backend.ctc_batch_cost(labels, considered_logits, logit_length, label_length)
        self.add_loss(tf.reduce_mean(ctcloss))

        decoded_labels = tf.where(tf.equal(labels, blank_label), -1, labels)
        decoded_logits = tf.keras.backend.ctc_decode(considered_logits, tf.squeeze(logit_length))[0][0][:, :tf.shape(labels)[1]]
        editd = tf.edit_distance(tf.sparse.from_dense(decoded_logits), tf.sparse.from_dense(tf.cast(decoded_labels, tf.int64)), normalize=True)
        self.add_metric(tf.reduce_mean(editd), aggregation='mean', name="edit_distance")

        return logits

In [None]:
# model for training
sif_input = layers.Input((100, freq_dim, 1), name= 'sif_input')
target_labels = layers.Input((None,), dtype=tf.int32, name= 'target_labels')

conv1 = layers.Conv2D(128, (3,3), activation='relu', padding='same')(sif_input)
bnconv11 = layers.BatchNormalization()(conv1)
mp1 = layers.MaxPooling2D((1,3))(bnconv11)
mp1 = layers.Dropout(0.5)(mp1)

conv2 = layers.Conv2D(128, (3,3), activation='relu', padding='same')(mp1)
bnconv2 = layers.BatchNormalization()(conv2)
mp2 = layers.MaxPooling2D((1,2))(bnconv2)
mp2 = layers.Dropout(0.5)(mp2)

conv3 = layers.Conv2D(128, (3,3), activation='relu', padding='same')(mp2)
bnconv3 = layers.BatchNormalization()(conv3)
mp3 = layers.MaxPooling2D((1,2))(bnconv3)
mp3 = layers.Dropout(0.5)(mp3)

conv4 = layers.Conv2D(128, (3,3), activation='relu', padding='same')(mp3)
bnconv4 = layers.BatchNormalization()(conv4)
mp4 = layers.MaxPooling2D((1,2))(bnconv4)
mp4 = layers.Dropout(0.5)(mp4)

conv_output = layers.Reshape((-1, mp4.shape[2]*mp4.shape[3]))(mp4)
conv_output = layers.MaxPool1D(pool_size=25, strides=15)(conv_output)

lstm_seq = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(conv_output)
lstm_seq = layers.Dropout(0.5)(lstm_seq)
lstm_seq = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(lstm_seq)
lstm_seq = layers.Dropout(0.5)(lstm_seq)

out_seq = layers.Dense(len(unique_classes)+1, activation='softmax', name='output')(lstm_seq)
out_seq = ctclosslayer()(out_seq, target_labels)

model = Model([sif_input, target_labels], out_seq)


Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


In [None]:
model.compile(optimizers.Adam(lr=0.005, clipnorm=5.0))

In [None]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sif_input (InputLayer)          [(None, 100, 52, 1)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 100, 52, 128) 1280        sif_input[0][0]                  
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 100, 52, 128) 512         conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 100, 17, 128) 0           batch_normalization[0][0]        
_______________________________________________________________________________________

# train the model

### functions for extracting features

In [None]:
def extract_SIF(raw_spec, W=13, frequency_stride=10):
    """
    raw_spec: spectogram of shape (Fbins, time)
    Fbins: number of freq bins
    returns: array of shape (F, time)
    """
    # frequency downsampling into F bins 
    L = raw_spec.shape[0]
    F = (L-W)//frequency_stride + 1
    SIF = np.zeros((F, raw_spec.shape[1]))
    for i in range(F):
        freq_window = raw_spec[i*frequency_stride:i*frequency_stride+W]
        SIF[i][:] = np.mean(freq_window, 0)    
    # denoise
    SIF_dn = SIF - np.min(SIF, axis=0)
    # augment SIF_dn append per frame time tomain energy
    energy_shorttime = np.sum(SIF_dn, axis=0)
    SIF_aug = np.concatenate(
        [SIF_dn, np.expand_dims(energy_shorttime, 0)], axis= 0)

    return SIF_aug


def extract_mbe(spec, sr=44100, n_fft=1024, n_mels=40):
    # log mel band energies
    mel_basis = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_mels)
    return np.log(np.dot(mel_basis, spec) + 1e-8)


def extract_melspec(spec, n_mels = 64):
    D = spec**2
    S = librosa.feature.melspectrogram(S=D, sr=44100, n_mels= n_mels)
    return S


### data generator class

In [None]:
# the SIF generator for task 2
class task2_SIF_generator():
    def __init__(self, spec_files, labels, blank_label, batch_size, not_infinite, feature='sif', pad_mode='constant', pad_labels=False):
        """
        spec_files: filepaths
        labels: list or np array
        not_infinite: set True is generator should terminate
        feature in ['sif', 'mbe', 'melspec']
        """
        self.count = 0
        self.spec_files = spec_files
        self.maxcount = len(spec_files)
        self.labels = labels
        self.blank_label = blank_label
        self.batch_size = batch_size
        self.not_infinite = not_infinite
        self.continue_generation = True
        self.pad_labels = pad_labels
        self.pmode = pad_mode
        self.feature = feature

        if pad_mode == 'constant':
            self.pad_fn = lambda label, pad_len: np.pad(label, [0, pad_len], constant_values= self.blank_label, mode=self.pmode)
        elif pad_mode == 'edge':
            self.pad_fn = lambda label, pad_len: np.pad(label, [0, pad_len], mode=self.pmode)
        else:
            self.pad_fn = lambda label, pad_len: np.pad(label, [0, pad_len], constant_values= self.blank_label, mode=self.pmode)

    def make_batch(self):
        self.count = self.count % self.maxcount
        MAX = self.count + self.batch_size

        sifs = []
        batch_labels = []
        max_spec_length = 0
        max_label_len = 0
        for i in range(self.count, MAX):

            idx = i % self.maxcount
            raw_spec = np.load(self.spec_files[idx], allow_pickle=True)
            max_spec_length = max(max_spec_length, raw_spec.shape[1])
            max_label_len = max(max_label_len, len(self.labels[idx]))

            batch_labels.append(np.asarray(self.labels[idx], np.int32))
            sifs.append(extract_mbe(raw_spec) if self.feature=='mbe' \
                        else extract_SIF(raw_spec) if self.feature=='sif' \
                        else extract_melspec(raw_spec))

            # terminate for non-infinite
            if self.not_infinite and i >= self.maxcount-1: 
                self.continue_generation = False
                break
            
        self.count += self.batch_size
        
        for i in range(len(sifs)):
            sif = sifs[i]
            pad_len = max_spec_length - sif.shape[1]
            sifs[i] = np.pad(sif, ((0,0),(0,pad_len))).T

        if self.pad_labels:
            for i in range(len(batch_labels)):
                label = batch_labels[i]
                pad_len = max_label_len - len(label)
                batch_labels[i] = self.pad_fn(label, pad_len)
            batch_labels = np.asarray(batch_labels, np.int32)

        sifs = np.asarray(sifs, np.float32)

        return {'sif_input':sifs, 'target_labels':batch_labels},
        

    def generator(self):
        '''
        files: np Array with file names as byte-strings
        labels: integer labels
        Output: np array of spectrograms, corresponding labels as a list
        '''
        while self.continue_generation:
            yield self.make_batch()

    def reset(self):
        self.count = 0
        self.continue_generation = True



### build the test train and dev data generators

In [None]:
# data generators to load batches, labels are non-consequtive-repeting
batch_size = 32
feature = 'sif'

train_datgenerator = task2_SIF_generator(
    spec_files= train_set[:,0],
    labels= train_set[:,1],
    blank_label= blank_label,
    feature= feature,
    pad_mode= 'constant',#'edge',
    batch_size= batch_size,
    not_infinite= False,
    pad_labels= True
)

dev_datgenerator = task2_SIF_generator(
    spec_files= dev_set[:,0],
    labels= dev_set[:,1],
    blank_label= blank_label,
    feature= feature,
    pad_mode= 'constant',#'edge',
    batch_size= batch_size,
    not_infinite= False,
    pad_labels= True
)

test_datgenerator = task2_SIF_generator(
    spec_files= test_set[:,0],
    labels= test_set[:,1],
    blank_label= blank_label,
    feature= feature,
    pad_mode= 'constant',#'edge',
    batch_size= batch_size,
    not_infinite= True,
    pad_labels= True
)

train_datgenerator_tf = tf.data.Dataset.from_generator(
    train_datgenerator.generator,
    ({'sif_input':tf.float32, 'target_labels':tf.int32},), 
    output_shapes= ({'sif_input':tf.TensorShape((None, None, freq_dim)), 'target_labels':tf.TensorShape((None, None))},)
    ).prefetch(tf.data.experimental.AUTOTUNE)

dev_datgenerator_tf = tf.data.Dataset.from_generator(
    dev_datgenerator.generator,
    ({'sif_input':tf.float32, 'target_labels':tf.int32},), 
    output_shapes= ({'sif_input':tf.TensorShape((None, None, freq_dim)), 'target_labels':tf.TensorShape((None, None))},)
    ).prefetch(tf.data.experimental.AUTOTUNE)


In [None]:
tf_log_num = "sif/1.1" 
if os.path.exists(f"cellar/run/task2/{tf_log_num}/ckeckpoints/best_val"): 
    print("log dir exists")
    model = tf.keras.models.load_model(f"cellar/run/task2/{tf_log_num}/ckeckpoints/best_val")
    model.summary()
else: 
    if not os.path.exists(f'cellar/run/task2/{tf_log_num}'): os.makedirs(f'cellar/run/task2/{tf_log_num}')
    tf.keras.utils.plot_model(model, to_file=f'cellar/run/task2/{tf_log_num}/model.png', show_shapes=True)

log dir exists
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
sif_input (InputLayer)          [(None, 100, 52, 1)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 100, 52, 128) 1280        sif_input[0][0]                  
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 100, 52, 128) 512         conv2d[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 100, 17, 128) 0           batch_normalization[0][0]        
________________________________________________________________________

In [None]:
# train the model with ctc loss change tf_log_num for every new model
print("batch_size: ", batch_size, "lr: ", model.optimizer.learning_rate.numpy())
history = model.fit(train_datgenerator_tf,
        epochs= 500,
        validation_data= dev_datgenerator_tf,
        steps_per_epoch= len(train_set)//batch_size,
        validation_steps= len(dev_set)//batch_size,
        callbacks=[
            callbacks.TensorBoard(log_dir=f'cellar/run/task2/{tf_log_num}/logs', 
                                histogram_freq=8, write_images=True),
            callbacks.ModelCheckpoint(f"cellar/run/task2/{tf_log_num}/ckeckpoints/best_val", 
                                monitor="val_edit_distance", verbose=1, save_best_only=True, period=10),
            callbacks.ModelCheckpoint(f"cellar/run/task2/{tf_log_num}/ckeckpoints/best_train", 
                                monitor='edit_distance', verbose=1, save_best_only=True, period=10),
            callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, verbose=1, min_delta=0.001),
            callbacks.LambdaCallback(on_epoch_end=lambda b,l: tf.print(f" - lr: {model.optimizer.learning_rate.numpy()} - ")),
            # callbacks.EarlyStopping(patience=10, restore_best_weights=True),
        ]
        )


In [None]:
# save model and save model weights
model.save(filepath=f"cellar/run/task2/{tf_log_num}/model_save.h5")
model.save_weights(filepath=f"cellar/run/task2/{tf_log_num}/model_weights.h5")

In [None]:
# evaluate on test set
test_datgenerator.reset()
model.evaluate(test_datgenerator.generator())



[6.809807300567627, 0.33059895038604736]

In [None]:
# evaluate on dev set
dev_datgenerator.not_infinite = True
dev_datgenerator.reset()
model.evaluate(dev_datgenerator.generator())
dev_datgenerator.not_infinite = False



In [None]:
# evaluate on train set
train_datgenerator.not_infinite = True
train_datgenerator.reset()
model.evaluate(train_datgenerator.generator())
train_datgenerator.not_infinite = False



## Predict function for inference

In [None]:
# model for predicting
predict_model = Model(model.get_layer('sif_input').input, model.get_layer('output').output)

class_map = {i: f"-{uniq}" for i, uniq in enumerate(unique_classes)}
class_map[-1] = ""

def Predict(spec, class_map):
    sif = extract_SIF(spec).T
    logits = predict_model(tf.expand_dims(sif, 0), training= False)
    decoded_logits = tf.keras.backend.ctc_decode(logits, [tf.shape(logits)[1]], greedy=False)[0][0]
    pred = "".join([class_map[i] for i in decoded_logits[0].numpy()]).strip("-")
    return pred

In [None]:
i=11 # example predict on a sample from test set
print("pred: ", Predict(np.load(test_set[i][0]), class_map), "\ngndt: ", "".join([f"{c} " for c in test_set[i][3]]))

pred:  jackhammer-drilling-gun_shot-dog_bark-street_music 
gndt:  jackhammer drilling gun_shot dog_bark street_music 


# Predict on all spectogram files in the 'test_feat_path' folder

In [None]:
est_save_path = "task2_labels_test.csv"
# est_save_path = "cellar/temp/task2/est.csv"

test_feat_path= "test_task2/feats"
# test_feat_path= "#shared_train/sample_test_task2/feats"

In [None]:
# make the predictions and save as a csv file
predictions = []
class_map = {i: f"-{uniq}" for i, uniq in enumerate(unique_classes)}
class_map[-1] = ""
for f in os.listdir(test_feat_path):
    spec = np.load(f"{test_feat_path}/{f}")
    predictions.append([f.split('.')[0], Predict(spec, class_map)])
if not os.path.exists("cellar/temp/task2"): os.makedirs("cellar/temp/task2/")
np.savetxt(est_save_path, predictions, delimiter =",",  fmt ='%s') 

Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


# for running the score function from utils

In [None]:
# !pip install python-Levenshtein

In [None]:
import utils # the utils.py given for evaluation

In [None]:
ground_truth_labels_path = "#shared_train/sample_test_task2/labels.csv"

In [None]:
print("score:", utils.evals(ground_truth_labels_path, est_save_path, 2))
p = utils.read_csv(est_save_path)
t = utils.read_csv(ground_truth_labels_path)
for k in sorted(p.keys()):
    ed, sc = utils.editDistance(t[k], p[k])
    print(f"{k}:", "editD: ", ed, "score: %.3f"%sc, "\n", "".join([" "]*len(f"{k}")), f" pred: ", p[k], "\n", "".join([" "]*len(f"{k}")), " gndt: ", t[k])

score: 0.825
a00001: editD:  1 score: 0.750 
         pred:  dog_bark-jackhammer-engine_idling 
         gndt:  siren-dog_bark-jackhammer-engine_idling
a00002: editD:  1 score: 0.667 
         pred:  car_horn-gun_shot 
         gndt:  car_horn-drilling-gun_shot
a00003: editD:  1 score: 0.500 
         pred:  drilling 
         gndt:  drilling-siren
a00004: editD:  0 score: 1.000 
         pred:  jackhammer-street_music-drilling-dog_bark 
         gndt:  jackhammer-street_music-drilling-dog_bark
a00005: editD:  0 score: 1.000 
         pred:  jackhammer-children_playing-street_music 
         gndt:  jackhammer-children_playing-street_music
a00006: editD:  0 score: 1.000 
         pred:  jackhammer 
         gndt:  jackhammer
a00007: editD:  0 score: 1.000 
         pred:  engine_idling-street_music 
         gndt:  engine_idling-street_music
a00008: editD:  2 score: 0.333 
         pred:  air_conditioner-dog_bark 
         gndt:  children_playing-car_horn-dog_bark
a00009: editD:  0 scor