In [1]:
## Import packages
import os
import csv
import random
import math
import numpy as np
import pandas as pd
from scipy.io import wavfile

import tensorflow as tf
from tensorflow import gfile
from tensorflow import logging

import vggish_input
import vggish_postprocess
import vggish_params
import vggish_slim

  from ._conv import register_converters as _register_converters


In [2]:
flags = tf.app.flags
FLAGS = flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')

flags.DEFINE_string(
    'audio_embedding_feature_name', 'audio_embedding',
    'Path to the VGGish checkpoint file.')

## De volgende bestanden komen uit het eerdere mapje 'models'. Nu heb ik ze in yt8m gezet.
flags.DEFINE_string(
    'pca_params', 'models/vggish_pca_params.npz',
    'Path to the VGGish PCA parameters file.')

flags.DEFINE_string(
    'checkpoint', 'models/vggish_model.ckpt',
    'Path to the VGGish checkpoint file.')

In [13]:
# label = "gunshots"
# label = 'bush_wenger'
# label = "elephants_wenger"
# label = 'elephants_angela'
label = 'background_angela'

In [14]:
## Map waarin de wav-files staan
#wavfile_path = label
#wavfile_path = 'elephants/angela_elephants'
wavfile_path = 'elephants/angela_background'
#wavfile_path = 'elephants/wenger_3717'
#wavfile_path = 'elephants/wenger_bush'
## Map waar de nieuwe tf-records terecht komen
tfrecord_path = "tfrecords/added_data/"

In [15]:
## Houd aantal samples bij
nr_9mm = 0
nr_556 = 0
#elephants_wenger = 0
#bush_wenger = 0
#elephants_angela = 0
background_angela = 0

## Proportie train/test
pct_train = 0.6

In [6]:
## Voor alle wav-files in een map, gebeurt het volgende:

## Eerst wordt het wav-file ingelezen en de bijbehorende label(s) opgezocht in een csv-bestand
## Als video-id wordt een willekeurig id'tje gegeven (in dit geval allemaal dezelfde)

## Dan parsen we wav-file naar embeddings: 
# (dit gebeurt door vggish_input.wavfile_to_example aan te roepen)
# Stap 1a: lezen van wav-file, input is array met samples die db aanduiden. Ook sample rate (per sec) wordt gelezen
# Stap 1b: Bij 2d array (stereo, ipv mono) bereken gemiddelde, daarna normaliseren (delen door 32.768)
# Stap 2: Bepaal examples in vorm [batch size, num frames, num bands].
    # Hierbij worden voor verschillende batches (omdat alles tegelijk niet in 1x in NN kan),
    # een log mel spectrogram gemaakt (in vorm [num_frames, num_bands])
# Stap 3: Bepaal features: nu wordt de embedding laag gemaakt (PCA-components, discreet maken etc)
    # Hiervoor worden model-parameters opgehaald die eerder zijn opgeslagen
    
## Daarna wordt een sequence example gemaakt (in getSequenceExample) en het als tf-records weggeschreven

In [9]:
## Function that takes examples from wav-file as input and returns a sequence example

def getSequenceExample(examples_batch, labels, video_id=[b'-1LrH01Ei1w']):
    with tf.Graph().as_default(), tf.Session() as sess:

        # Prepare a postprocessor to munge the model embeddings.
        pproc = vggish_postprocess.Postprocessor(FLAGS.pca_params)
    
        # Define the model: load the checkpoint and locate input and output tensors
        # Input: [batch_size, num_frames, num_bands] 
        # where [num_frames, num_bands] represents log-mel-scale spectrogram
        # Output: embeddings
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)

        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.VGGISH_INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(
            vggish_params.VGGISH_OUTPUT_TENSOR_NAME)

        # Run inference and postprocessing.
        [embedding_batch] = sess.run([embedding_tensor],
                                     feed_dict={features_tensor: examples_batch})
        print(embedding_batch.shape)
        if(embedding_batch.shape!=10):
            print("*****************************************************************")

        postprocessed_batch = pproc.postprocess(embedding_batch)
        #print(postprocessed_batch)


        ## Maak labels en video-id voor in de example
        label_feat = tf.train.Feature(int64_list=tf.train.Int64List(value=labels))
        videoid_feat = tf.train.Feature(bytes_list=tf.train.BytesList(value=video_id))

        ## Maak sequence example
        seq_example = tf.train.SequenceExample(
            context = tf.train.Features(feature={"labels": label_feat, "video_id": videoid_feat}),
            feature_lists = tf.train.FeatureLists(
                feature_list={
                    FLAGS.audio_embedding_feature_name:
                        tf.train.FeatureList(
                            feature=[
                                tf.train.Feature(
                                    bytes_list=tf.train.BytesList(
                                        value=[embedding.tobytes()]))
                                for embedding in postprocessed_batch
                            ]
                        )
                }
            )
        )
        
    return(seq_example)

In [10]:
#11,/s/0000,Gunshot SC9mm
#12,/s/0001,Gunshot SC556
#13,/s/0002,Elephant
#14,/s/0003,Bush

def getLabels(file):
    if '9mm' in file:
        labels = [11]
    elif '556' in file or '2000m' in file:
        labels = [12]
    return(labels)

In [16]:
## Lees stuk voor stuk alle wav-files in
## Zoek het bijbehorende label op in een csv-bestand
## Bepaal de embeddings

# Prepare a record writer to store the postprocessed embeddings.
## Trainset
if(label == "gunshots"):
    train_tfrecord = str(tfrecord_path + 'gun_train.tfrecord')
    test_tfrecord = str(tfrecord_path + 'gun_test.tfrecord')
elif(label == "elephants_angela"):
    train_tfrecord = str(tfrecord_path + 'elephant_angela_train.tfrecord')
    test_tfrecord = str(tfrecord_path + 'elephant_angela_test.tfrecord')
elif(label == "background_angela"):
    train_tfrecord = str(tfrecord_path + 'background_angela_train.tfrecord')
    test_tfrecord = str(tfrecord_path + 'background_angela_test.tfrecord')
elif(label == "elephants_wenger"):
    train_tfrecord = str(tfrecord_path + 'elephant_wenger_train.tfrecord')
    test_tfrecord = str(tfrecord_path + 'elephant_wenger_test.tfrecord')
elif(label == "bush_wenger"):
    train_tfrecord = str(tfrecord_path + 'bush_wenger_train.tfrecord')
    test_tfrecord = str(tfrecord_path + 'bush_wenger_test.tfrecord')

    
    
train_writer = tf.python_io.TFRecordWriter(train_tfrecord)
test_writer = tf.python_io.TFRecordWriter(test_tfrecord)

## Read and rewrite all test files
files = gfile.Glob(str(wavfile_path + "*.wav"))

count = 1
for file in files:
    
    print("file nr: " + str(count))
    print("Filename: " + file)
    
    count += 1
    
    #print("Filename: " + str(file))
   
    ## Find labels belonging to wav-file
    if(label=="gunshots"):
        labels = getLabels(file)
        if(labels[0]==11):
            nr_9mm += 1
        elif(labels[0]==12):
            nr_556 += 1
    elif(label == "elephants_wenger"):
        labels = [13]
        elephants += 1
    elif(label == "bush_wenger"):
        labels = [14]
        bush += 1
    elif(label == "elephants_angela"):
        labels = [15]
        elephants_angela += 1
    elif(label == "background_angela"):
        labels = [16]
        elephants_angela += 1
    
    ## This function reads the wav file and converts the samples into np arrays of [batch size, num frames, num bands]
    #examples_batch = vggish_input.wavfile_to_examples(str(FLAGS.wavfile_path + wav_file))
    examples_batch = vggish_input.wavfile_to_examples(file)
    
    if(examples_batch.shape[0]>10):
        nr_sec = examples_batch.shape[0]
        print(nr_sec)
        start = int(math.floor((nr_sec-10)/2))
        print("start: " + str(start))
        end = int(nr_sec-math.ceil((nr_sec-10)/2))
        print("end: " + str(end))
        examples_batch = examples_batch[start:end, :, :]
        print(examples_batch.shape)
    
    #print("Examples shape: " + str(examples_batch.shape))
   
    seq_example = getSequenceExample(examples_batch, labels)
    
    rand = random.random()
    if rand <= pct_train:
        train_writer.write(seq_example.SerializeToString())
    else:
        test_writer.write(seq_example.SerializeToString())
    
    # Schrijf 556 dubbel weg
    if(labels[0]==12):
        if rand <= pct_train:
            train_writer.write(seq_example.SerializeToString())
        else:
            test_writer.write(seq_example.SerializeToString())
train_writer.close()
test_writer.close()

file nr: 1
Filename: elephants\angela_background\BackgroundEFAF2011A008-102.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 2
Filename: elephants\angela_background\BackgroundEFAF2011A008-165.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 3
Filename: elephants\angela_background\BackgroundEFAF2011A008-232.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 4
Filename: elephants\angela_background\BackgroundEFAF2011A008-326.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 5
Filename: elephants\angela_background\BackgroundEFAF2011A009-146.WAV
INFO:tensorflow:Restoring pa

(10, 128)
*****************************************************************
file nr: 38
Filename: elephants\angela_background\BackgroundEFAF2011A019-238.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 39
Filename: elephants\angela_background\BackgroundEFAF2011A019-254.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 40
Filename: elephants\angela_background\BackgroundEFAF2011A019-275.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 41
Filename: elephants\angela_background\BackgroundEFAF2011A019-311.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 42
Filename: elepha

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 75
Filename: elephants\angela_background\BackgroundEFAF2011A021-1492.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 76
Filename: elephants\angela_background\BackgroundEFAF2011A021-1537.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 77
Filename: elephants\angela_background\BackgroundEFAF2011A021-1565.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 78
Filename: elephants\angela_background\BackgroundEFAF2011A021-1615.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 112
Filename: elephants\angela_background\BackgroundEFAF2011A023-388.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 113
Filename: elephants\angela_background\BackgroundEFAF2011A023-430.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 114
Filename: elephants\angela_background\BackgroundEFAF2011A023-463.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 115
Filename: elephants\angela_background\BackgroundEFAF2011A023-522.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 149
Filename: elephants\angela_background\BackgroundEFAF2011A025-914.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 150
Filename: elephants\angela_background\BackgroundEFAF2011A025-970.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 151
Filename: elephants\angela_background\BackgroundEFAF2011A026-122.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 152
Filename: elephants\angela_background\BackgroundEFAF2011A026-164.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 186
Filename: elephants\angela_background\BackgroundEFAF2011A027-807.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 187
Filename: elephants\angela_background\BackgroundEFAF2011A027-935.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 188
Filename: elephants\angela_background\BackgroundEFAF2011A027-964.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 189
Filename: elephants\angela_background\BackgroundEFAF2011A028-101.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 223
Filename: elephants\angela_background\BackgroundEFAF2011A028-893.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 224
Filename: elephants\angela_background\BackgroundEFAF2011A028-911.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 225
Filename: elephants\angela_background\BackgroundEFAF2011A028-927.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 226
Filename: elephants\angela_background\BackgroundEFAF2011A028-957.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 260
Filename: elephants\angela_background\BackgroundEFAF2011A030-611.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 261
Filename: elephants\angela_background\BackgroundEFAF2011A030-639.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 262
Filename: elephants\angela_background\BackgroundEFAF2011A030-671.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 263
Filename: elephants\angela_background\BackgroundEFAF2011A030-700.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 297
Filename: elephants\angela_background\BackgroundEFAF2011A033-304.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 298
Filename: elephants\angela_background\BackgroundEFAF2011A033-327.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 299
Filename: elephants\angela_background\BackgroundEFAF2011A033-376.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
*****************************************************************
file nr: 300
Filename: elephants\angela_background\BackgroundEFAF2011A033-417.WAV
INFO:tensorflow:Restoring parameters from models/vggish_model.ckpt
(10, 128)
***********************

In [10]:
print("nr 9mm: " + str(nr_9mm))
print("nr 556: " + str(nr_556))

nr 9mm: 0
nr 556: 0


In [16]:
print("nr elephants: " + str(elephants))

nr elephants: 3717


In [20]:
print("nr bush: " + str(bush))

nr bush: 1540


In [10]:
print("nr gun google: " + str(gun_google))

nr gun google: 414
