#Train Model

This colab demonstrates how to extract the AudioSet embeddings, using a VGGish deep neural network (DNN).

Based on the directions at: https://github.com/tensorflow/models/tree/master/research/audioset

In [0]:
!lscpu
!nvidia-smi

Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              2
On-line CPU(s) list: 0,1
Thread(s) per core:  2
Core(s) per socket:  1
Socket(s):           1
NUMA node(s):        1
Vendor ID:           GenuineIntel
CPU family:          6
Model:               79
Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz
Stepping:            0
CPU MHz:             2200.000
BogoMIPS:            4400.00
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            256K
L3 cache:            56320K
NUMA node0 CPU(s):   0,1
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_sin

In [0]:
#Google drive access
import os
from google.colab import drive
drive.mount('/content/gdrive',force_remount = True)

#Directory
root_path = 'gdrive/My Drive/SoundEventDetection/modelTraining'
os.chdir(root_path)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
%%capture
#Install necessary software
!pip install six
!pip install h5py
!pip install pydub
!pip install numpy
!pip install scipy
!pip install keras
!pip install future
!pip install resampy
!pip install ipython
!pip install soundfile
!pip install pysoundfile
!pip install scikit-learn
!apt-get install libsndfile1

!pip install python==3.6
!pip install matplotlib
!pip install cudnn==7.1.2
!pip install cudatoolkit==9
!pip install tensorflow-gpu==1.12.0

#Install: cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb
!dpkg -i cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb
!apt-key add /var/cuda-repo-9-0-local/7fa2af80.pub
!apt-get update
!apt-get install cuda=9.0.176-1
#OR
#!wget https://developer.nvidia.com/compute/cuda/9.0/Prod/local_installers/cuda-repo-ubuntu1604-9-0-local_9.0.176-1_amd64-deb

#VGGish model checkpoint, in TensorFlow checkpoint format.
import os 
os.chdir("trained_models")
!wget https://storage.googleapis.com/audioset/vggish_model.ckpt
os.chdir("..")

In [0]:
%%capture
#Important DO NOT DELETE
#import six
import sys
#import h5py
#import math
#import glob
#import h5py
#import time
import numpy as np
import pandas as pd
import soundfile as sf
import tensorflow as tf
import matplotlib.pyplot as plt
#from pydub.playback import play
#from pydub import AudioSegment
#from scipy.io import wavfile
#from scipy.io.wavfile import write

sys.path.insert(1, os.path.join(sys.path[0], '../'))

#ML imports
#from keras.layers import Input, Dense, BatchNormalization, Dropout, Activation, Concatenate
from keras.layers import Lambda
#from keras.optimizers import Adam
from keras.models import load_model
from keras.models import Model
import keras.backend as K

#External .py scripts
from lib import mel_features
from lib import vggish_input
from lib import vggish_params
from lib import vggish_postprocess
from lib import vggish_slim
from lib import utilities
from lib import data_generator
from lib.train_functions import evaluateCore, trainCore, average_pooling, max_pooling, attention_pooling, pooling_shape, train, writeToFile

In [0]:
#Download dataset
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=0B49XSFgf-0yVQk01eG92RHg4WTA' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=0B49XSFgf-0yVQk01eG92RHg4WTA" -O packed_features.zip && rm -rf /tmp/cookies.txt
!unzip packed_features.zip

/content/gdrive/My Drive/SoundEventDetection/modelTraining
--2020-04-01 22:11:27--  https://docs.google.com/uc?export=download&confirm=FH1g&id=0B49XSFgf-0yVQk01eG92RHg4WTA
Resolving docs.google.com (docs.google.com)... 172.217.5.206, 2607:f8b0:4007:80d::200e
Connecting to docs.google.com (docs.google.com)|172.217.5.206|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-14-4s-docs.googleusercontent.com/docs/securesc/5hghd0s5edaf2sgjtqq3ao09vlonffac/okt7g8d8v6m445hdg1qdretgr6ij33hk/1585779075000/05072233986702819728/08326851818180176792Z/0B49XSFgf-0yVQk01eG92RHg4WTA?e=download [following]
--2020-04-01 22:11:27--  https://doc-14-4s-docs.googleusercontent.com/docs/securesc/5hghd0s5edaf2sgjtqq3ao09vlonffac/okt7g8d8v6m445hdg1qdretgr6ij33hk/1585779075000/05072233986702819728/08326851818180176792Z/0B49XSFgf-0yVQk01eG92RHg4WTA?e=download
Resolving doc-14-4s-docs.googleusercontent.com (doc-14-4s-docs.googleusercontent.com)... 216.58.217.193, 26

In [0]:
#Set args
args = {
  "data_dir" : "packed_features/",
  "workspace" : "workspace/",
  "mini_data" : False,
  "balance_type" : "balance_in_batch", #'no_balance', 'balance_in_batch'
  "model_type" : 'decision_level_single_attention', #'decision_level_max_pooling', 'decision_level_average_pooling', 'decision_level_single_attention', 'decision_level_multi_attention', 'feature_level_attention'
  "learning_rate" : 1e-3,
}

args["filename"] = utilities.get_filename("work/")

#Logs
logs_dir = os.path.join(args["workspace"], 'logs', args["filename"])
utilities.create_folder(logs_dir)
logging = utilities.create_logging(logs_dir, filemode='w')

In [0]:
#Train Model
def trainModel():
  if True:
      train(args, 0)
  else:
      args["bgn_iteration"] = 10000
      args["fin_iteration"] = 50001
      args["interval_iteration"] = 5000
      utilities.get_avg_stats(args)

trainModel()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 10, 128)      0                                            
__________________________________________________________________________________________________
dense_11 (Dense)                (None, 10, 1024)     132096      input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 10, 1024)     4096        dense_11[0][0]                   
__________________________________________________________________________________________________
activation_7 (Activation)       (None, 10, 1024)     0           batch_normalization_7[0][0]      
____________________________________________________________________________________________