In [1]:
import os
# 0 = all, 1 = INFO, 2 = WARNING, 3 = ERROR
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import tensorflow_io as tfio
from matplotlib import pyplot as plt
import numpy as np
import keras
from keras import layers
import random
import csv
import time
from datetime import datetime

In [2]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    classification_report,
    confusion_matrix,
    precision_recall_curve,
    log_loss,
    auc,
    average_precision_score
)
from scipy.interpolate import interp1d

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

In [4]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))


2.10.1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
import psutil

mem = psutil.virtual_memory()
print(f"Total RAM:     {mem.total / 1e9:.2f} GB")
print(f"Available RAM: {mem.available / 1e9:.2f} GB")
print(f"Used RAM:      {mem.used / 1e9:.2f} GB")
print(f"RAM Usage:     {mem.percent}%")

Total RAM:     34.29 GB
Available RAM: 19.29 GB
Used RAM:      15.00 GB
RAM Usage:     43.8%


## Set Paths

In [6]:
print(os.getcwd())

c:\Users\foxir\OneDrive\Desktop\Smart Rock\yellowhammer-tiny\notebooks


In [None]:
train_NEG = 'Development_Set//Training_Set//Negatives'
train_POS = 'Development_Set//Training_Set//Yellowhammer'
val_NEG = 'Development_Set//Validation_Set//Negatives'
val_POS = 'Development_Set//Validation_Set//Yellowhammer'

train_neg = [[train_NEG+'//'+item,0] for item in os.listdir(train_NEG)]
train_pos = [[train_POS+'//'+item,1] for item in os.listdir(train_POS)]
val_neg = [[val_NEG+'//'+item,0] for item in os.listdir(val_NEG)]
val_pos = [[val_POS+'//'+item,1] for item in os.listdir(val_POS)]

full_train = train_neg+train_pos
full_val = val_neg+val_pos

random.seed(42)
random.shuffle(full_train)
random.shuffle(full_val)

t_filepaths = [x[0] for x in full_train]
t_labels = [x[1] for x in full_train]
v_filepaths = [x[0] for x in full_val]
v_labels = [x[1] for x in full_val]

In [None]:
EVAL = 'Evaluation_Set'
eval = [EVAL+'//'+item for item in os.listdir(EVAL)]

## Initialize Audio Functions

In [None]:
desired_sr = 16000

@tf.function
def load_wav_16k_mono_tf(filename, label):
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # resample wav if sample rate is not as desired
    wav = tf.cond(tf.not_equal(sample_rate, desired_sr),
                  lambda: tfio.audio.resample(wav, rate_in=sample_rate, rate_out=desired_sr),
                  lambda: wav)
    wav = tf.squeeze(wav, axis=-1)
    label = tf.cast(label, dtype=tf.float32)
    return wav, label

#@tf.function
def load_wav_16k_mono_tf_nolabel(filename):
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    #print(sample_rate)
    #print(len(wav))
    #print(len(wav)/sample_rate)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # resample wav if sample rate is not as desired
    wav = tf.cond(tf.not_equal(sample_rate, desired_sr),
                  lambda: tfio.audio.resample(wav, rate_in=sample_rate, rate_out=desired_sr),
                  lambda: wav)
    wav = tf.squeeze(wav, axis=-1)
    return wav

In [None]:
sample_rate = 16000
frame_length = 256
frame_step = 192 # 75%
mel_bins = 64
fmax = 8000
fmin = 1028

@tf.function
def preprocess_mel_db_tf(wav, label):

    # pad wav if too short else keep it as initial length
    wav_len = tf.shape(wav)[0]
    wav = tf.cond(wav_len > 32000,
                  lambda: wav,
                  lambda: tf.cond(wav_len < 32000,
                                  lambda: tf.concat([wav, tf.zeros([32000 - wav_len], dtype=tf.float32)], axis=0),
                                  lambda: wav))

    # create the spectrogram
    spectrogram = tfio.audio.spectrogram(wav, frame_length, frame_length, frame_step)
    mel_spectrogram = tfio.audio.melscale(spectrogram, sample_rate, mel_bins, fmin, fmax)
    dbscale_mel_spectrogram = tfio.audio.dbscale(mel_spectrogram, top_db=80)

    # add a dimension for Conv2D input --not needed if augmenting the spectrogram
    #dbscale_mel_spectrogram = tf.expand_dims(dbscale_mel_spectrogram, axis=2)

    return dbscale_mel_spectrogram, label

@tf.function
def preprocess_mel_db_tf_nolabel(wav):

    # pad wav if too short and cut it if too long
    wav_len = tf.shape(wav)[0]
    wav = tf.cond(wav_len > 32000,
                  lambda: wav,
                  lambda: tf.cond(wav_len < 32000,
                                  lambda: tf.concat([wav, tf.zeros([32000 - wav_len], dtype=tf.float32)], axis=0),
                                  lambda: wav))

    # create the spectrogram
    spectrogram = tfio.audio.spectrogram(wav, frame_length, frame_length, frame_step)
    mel_spectrogram = tfio.audio.melscale(spectrogram, sample_rate, mel_bins, fmin, fmax)
    dbscale_mel_spectrogram = tfio.audio.dbscale(mel_spectrogram, top_db=80)

    # add a dimension for Conv2D input --not needed if augmenting the spectrogram
    #dbscale_mel_spectrogram = tf.expand_dims(dbscale_mel_spectrogram, axis=2)

    return dbscale_mel_spectrogram