# Preparations

## Prepare GoogleDrive

In [None]:
from google.colab import drive
GOOGLE_DRIVE_MOUNT = "/content/gdrive"
drive.mount(GOOGLE_DRIVE_MOUNT)

In [None]:
% cd ..
! cp -rf '/content/gdrive/My Drive/colab/Frame_Classification' .
% cd Frame_Classification

In [None]:
! pip install wavio
! pip install soundfile
! pip install nlpaug
! pip install keras-metrics

## Imports

In [None]:
import os
import numpy as np
from core import read_meta_yaml
import wave
import contextlib
import wavio
from scipy import signal
import matplotlib.pyplot as plt
from itertools import groupby
import librosa
import nlpaug.flow as naf
import nlpaug.augmenter.spectrogram as nas
import nlpaug.augmenter.audio as naa

## Env Variables

In [None]:
train_events_folder = './audio_data/train/events'
train_bgs_folder = './audio_data/train/bgs/audio'
train_events_meta_folder = './audio_data/train/cv_setup/events_evaltest.yaml'

val_events_folder = './audio_data/val/events'
val_bgs_folder = './audio_data/val/bgs/audio'
val_events_meta_folder = './audio_data/val/cv_setup/events_evaltest.yaml'

In [None]:
WINDOW_WIDTH = 5e-1 #s
HOP_LENGTH = 225
SAMPLE_RATE = 44100
SPECTROGRAM_HEIGH = 129
SPECTROGRAM_WIDTH = int(WINDOW_WIDTH * SAMPLE_RATE / HOP_LENGTH)
MASK_FACTOR = 40
NMB_OF_GENERATED_IMG_PER_IMG = 1
PROB_TRESHOLD = 0.4

## Data Generators

In [None]:
class_labels = os.listdir(train_events_folder) + ['bg']
class_labels

In [None]:
NUM_CLASSES = len(class_labels)
class_to_idx = {c: idx for idx, c in enumerate(class_labels)}
class_to_idx

In [None]:
idx_to_class = {class_to_idx[c]: c for c in class_to_idx.keys()}
idx_to_class

In [None]:
def get_duration(wav_file_name):
    with contextlib.closing(wave.open(wav_file_name,'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
        return duration

In [None]:
def get_one_class_generator(class_name, phase = 'train', debug=False):
    if phase == 'train':
        bgs_folder = train_bgs_folder
        events_folder = train_events_folder
        events_meta_folder = train_events_meta_folder
    else:
        bgs_folder = val_bgs_folder
        events_folder = val_events_folder
        events_meta_folder = val_events_meta_folder
      
    while True:
        if class_name == 'bg':
            for file in os.listdir(bgs_folder):
                duration = get_duration(bgs_folder+'/'+file)
                
                if duration < WINDOW_WIDTH:
                    continue
                
                onset = np.random.random(1) * (duration - WINDOW_WIDTH)
                offset = onset + WINDOW_WIDTH
                yield class_name, bgs_folder+'/'+file, float(onset), float(offset)
                
            if debug:
                print(class_name, "is done, starting from the beginning...")
        else:
            for file in read_meta_yaml(events_meta_folder)[class_name]:
                
                if file['segment'][1] - file['segment'][0] < WINDOW_WIDTH:
                    continue
                
                onset = file['segment'][0] + np.random.random(1) * (file['segment'][1] - file['segment'][0]- WINDOW_WIDTH)
                offset = onset + WINDOW_WIDTH
                yield class_name, events_folder + '/' + class_name + '/' + file['audio_filename'], float(onset), float(offset)
                
            if debug:
                print(class_name, "is done, starting from the beginning...")

In [None]:
def raw_batch_generator(batch_size, phase = 'train', debug=False):
    generators = np.array([get_one_class_generator(class_name, phase, debug) for class_name in class_labels])
    while True:
        random_indices = np.random.randint(0, len(generators), size=batch_size)
        yield [gen.__next__() for gen in generators[random_indices]]

In [None]:
def create_spectrogram_from_wav_file(wavfile_path, onset, offset):
    audio = wavio.read(wavfile_path).data
    if audio.shape[1] > 1:
        audio = np.sum(audio, axis = 1)
    else:
        audio = audio.reshape((-1,))
    if offset*SAMPLE_RATE > audio.shape[0]:
        old = audio.shape
        new = (int(SAMPLE_RATE * WINDOW_WIDTH), )
        samples = audio[(np.arange(new[0]) % old[0])]
    else:
        samples = audio[int(onset*SAMPLE_RATE):int(offset*SAMPLE_RATE)]
    frequencies, times, spectrogram = signal.spectrogram(samples, SAMPLE_RATE)
#    fig=plt.figure(figsize=((5, 5)))
#    ax=fig.add_subplot(1,1,1)
#    plt.axis('off')
#    plt.pcolormesh(times, frequencies, np.log10(spectrogram+1e-20), figure = fig)
    return spectrogram

def create_spectrogram_from__aug_wav_file(wavfile_path, onset, offset):
    audio = wavio.read(wavfile_path).data.astype('float32')
    if audio.shape[1] > 1:
        audio = np.sum(audio, axis = 1)
    else:
        audio = audio.reshape((-1,))
    
    audio_shape = audio.shape[0]
    offset_samples = int(offset*SAMPLE_RATE)
    onset_samples = int(onset*SAMPLE_RATE)
    desired_shape = int(SAMPLE_RATE * WINDOW_WIDTH)
    
    if offset_samples > audio.shape[0]:
        old = audio.shape
        new = (int(SAMPLE_RATE * WINDOW_WIDTH), )
        samples = audio[(np.arange(new[0]) % old[0])]
    else:
        samples = audio[onset_samples:offset_samples]
        
    flow = naf.Sequential([
        naa.PitchAug(sampling_rate=SAMPLE_RATE, pitch_factor=(np.random.random() - 0.5) * 10),
        naa.SpeedAug(speed_factor=(np.random.random()*0.4 + 0.8)),
        naa.ShiftAug(sampling_rate=SAMPLE_RATE, shift_max=samples.shape[0] / (10 * SAMPLE_RATE))
        ])
    samples = flow.augment(samples)
    
    if desired_shape > samples.shape[0]:
        audio_pad = np.zeros(desired_shape)
        audio_pad[:samples.shape[0]] = samples
        samples = audio_pad
    else:
        samples = samples[:desired_shape]
        
    noise = naa.NoiseAug(np.random.random()*50)
    samples = noise.substitute(samples)
        
    frequencies, times, spectrogram = signal.spectrogram(samples, SAMPLE_RATE)
#    fig=plt.figure(figsize=((5, 5)))
#    ax=fig.add_subplot(1,1,1)
#    plt.axis('off')
#    plt.pcolormesh(times, frequencies, np.log10(spectrogram+1e-20), figure = fig)
    return spectrogram

In [None]:
def data_and_labels_generator(batch_size, phase = 'train'):
    for batch in raw_batch_generator(batch_size, phase):
        batch_data = []
        batch_labels = []
        for wav in batch:          
            batch_data.append(create_spectrogram_from_wav_file(wav[1], wav[2], wav[3]))
            batch_labels.append(wav[0])
        batch_data = np.stack(batch_data, axis=0)
        yield batch_data, batch_labels
        
augmentor = naf.Sequential([
            nas.FrequencyMaskingAug(mask_factor=MASK_FACTOR),
            nas.FrequencyMaskingAug(mask_factor=MASK_FACTOR),
            nas.TimeMaskingAug(mask_factor=MASK_FACTOR), 
            nas.TimeMaskingAug(mask_factor=MASK_FACTOR)])
        
def data_and_labels_generator_with_augmentation(batch_size, phase = 'train'):
    for batch in raw_batch_generator(batch_size, phase):
        batch_data = []
        batch_labels = []
        for wav in batch:
            data = create_spectrogram_from__aug_wav_file(wav[1], wav[2], wav[3])
            for s in range(NMB_OF_GENERATED_IMG_PER_IMG):
                batch_data.append(augmentor.augment(data))
                batch_labels.append(wav[0])
        batch_data = np.stack(batch_data, axis=0)
        yield batch_data, batch_labels

## Train preparations

In [None]:
import tensorflow as tf
import keras
from keras import backend as K

In [None]:
# reset graph when you change architecture!
def reset_tf_session():
    curr_session = tf.get_default_session()
    # close current session
    if curr_session is not None:
        curr_session.close()
    # reset graph
    K.clear_session()
    # create new session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    s = tf.InteractiveSession(config=config)
    K.set_session(s)
    return s

In [None]:
def train_iterator(batch_size):
    for batch in data_and_labels_generator(batch_size, phase = 'train'):
        data = batch[0].astype('float32')
        data = data +  1e-21
        data = np.log10(data)
        data =  (((data - np.min(data)) * (1 - (-1))) / (np.max(data) - np.min(data))) + (-1)
        data = np.expand_dims(data, -1)
        labels = keras.utils.to_categorical(list(map(class_to_idx.get, batch[1])), NUM_CLASSES)
        yield data, labels
      
def train_iterator_with_augmentation(batch_size):
    for batch in data_and_labels_generator_with_augmentation(batch_size, phase = 'train'):
        data = batch[0].astype('float32')
        data = data +  1e-21
        data = np.log10(data)
        data =  (((data - np.min(data)) * (1 - (-1))) / (np.max(data) - np.min(data))) + (-1)
        data = np.expand_dims(data, -1)
        labels = keras.utils.to_categorical(list(map(class_to_idx.get, batch[1])), NUM_CLASSES)
        yield data, labels
      
      
def val_iterator(batch_size):
    for batch in data_and_labels_generator(batch_size, phase = 'val'):
        data = batch[0].astype('float32')
        data = data +  1e-21
        data = np.log10(data)
        data =  (((data - np.min(data)) * (1 - (-1))) / (np.max(data) - np.min(data))) + (-1)
        data = np.expand_dims(data, -1)
        labels = keras.utils.to_categorical(list(map(class_to_idx.get, batch[1])), NUM_CLASSES)
        yield data, labels
      
def val_iterator_with_augmentation(batch_size):
    for batch in data_and_labels_generator_with_augmentation(batch_size, phase = 'val'):
        data = batch[0].astype('float32')
        data = data +  1e-21
        data = np.log10(data)
        data =  (((data - np.min(data)) * (1 - (-1))) / (np.max(data) - np.min(data))) + (-1)
        data = np.expand_dims(data, -1)
        labels = keras.utils.to_categorical(list(map(class_to_idx.get, batch[1])), NUM_CLASSES)
        yield data, labels

In [None]:
# import necessary building blocks
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, GlobalAveragePooling2D, \
    BatchNormalization

In [None]:
from keras.metrics import categorical_crossentropy, categorical_accuracy
from keras_metrics import precision, recall, f1_score

In [None]:
# for saving the model after every epoch
from keras.models import save_model

class ModelSaveCallback(keras.callbacks.Callback):
    def __init__(self, file_name):
        super(ModelSaveCallback, self).__init__()
        self.file_name = file_name

    def on_epoch_end(self, epoch, logs=None):
        model_filename = self.file_name.format(epoch)
        save_model(self.model, model_filename)
        print("Model saved in {}".format(model_filename))

In [None]:
GOOGLE_DRIVE_ROOT = GOOGLE_DRIVE_MOUNT + "/" + list(filter(lambda x: x[0] != '.', os.listdir(GOOGLE_DRIVE_MOUNT)))[0]
print(GOOGLE_DRIVE_ROOT)

# will save checkpoints to Google Drive
CHECKPOINT_TEMPLATE = GOOGLE_DRIVE_ROOT + "/colab/Frame_Classification/model_{}"
print(CHECKPOINT_TEMPLATE)

# Архитектура

In [None]:
def make_model2():
   
    model = Sequential()
    
    model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=2, padding='same', input_shape=(SPECTROGRAM_HEIGH, SPECTROGRAM_WIDTH, 1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=32, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=64, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=64, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    model.add(Conv2D(filters=128, kernel_size=(3, 3), strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=128, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    
    model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=256, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    model.add(Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=512, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=512, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=512, kernel_size=(3, 3), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=512, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    model.add(Conv2D(filters=512, kernel_size=(3, 3), strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=512, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################
    
    model.add(Conv2D(filters=1024, kernel_size=(3, 3), strides=2, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv2D(filters=1024, kernel_size=(1, 1), strides=1, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    ############################################################################  
    
    model.add(GlobalAveragePooling2D())
    
    ############################################################################

    model.add(Dense(1024))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Dense(NUM_CLASSES, activation="softmax"))
    
    return model

In [None]:
# describe model
s = reset_tf_session()  # clear default graph
model = make_model1()
model.summary()

In [None]:
BATCH_SIZE = 256
STEPS_PER_EPOCH = 100
EPOCHS = 100

s = reset_tf_session()  # clear default graph
model = make_model2()  # define our model

# prepare model for fitting (loss, optimizer, etc)
model.compile(
    loss='categorical_crossentropy',
    optimizer=keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.0),
    metrics=[categorical_accuracy, precision(), recall(), f1_score()]
)
last_finished_epoch = 20

# Обучение

In [None]:
 model.fit_generator(
    train_iterator_with_augmentation(BATCH_SIZE), 
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[ModelSaveCallback(CHECKPOINT_TEMPLATE)],
    verbose=1,
    initial_epoch=last_finished_epoch,
    validation_data = val_iterator_with_augmentation(BATCH_SIZE),
    validation_steps = 1
)