In [227]:
import os
import tensorflow as tf 
import tensorflow_io as tfio
import librosa
import numpy as np
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [228]:
Yes_Boat = os.path.join('Y_N_Boat', 'y', 'y_CCC_139.20220626_084400.wav')
No_Boat = os.path.join('Y_N_Boat', 'n', 'n_CCC_106.20220628_144100.wav')

In [None]:
No_Boat

# Loading Data

In [230]:
def add_white_noise(signal, noise_factor):
    noise = np.random.normal(0, signal.std(), signal.size)
    augemented_signal = signal + noise *  noise_factor
    return augemented_signal

In [231]:
def time_stretch(signal, stretch_rate):
    # Stretch the signal
    stretched_signal = librosa.effects.time_stretch(signal, rate=stretch_rate)
    
    original_length = len(signal)
    stretched_length = len(stretched_signal)
    
    if stretched_length > original_length:
        # If the stretched signal is longer, clip it to the original length
        return stretched_signal[:original_length]
    elif stretched_length < original_length:
        # If the stretched signal is shorter, pad it to the original length
        padding = original_length - stretched_length
        return np.pad(stretched_signal, (0, padding), 'constant', constant_values=(0, 0))
    else:
        # If the lengths are equal, return the stretched signal as is
        return stretched_signal

In [232]:
def pitch_scale(signal, sr, num_semimtones):
    return librosa.effects.pitch_shift(signal, sr=sr, n_steps=num_semimtones)

In [233]:
def brightness(signal, max_delta):
    return tf.image.random_brightness(signal, max_delta)

In [234]:
def random_gain(signal, min_gain_factor, max_gain_factor):
    gain_factor = np.random.uniform(min_gain_factor, max_gain_factor)
    return signal * gain_factor

In [235]:
def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels) 
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)

    return wav


In [259]:
def augment(filename):
    wav, sr = librosa.load(filename)
    choice = tf.random.uniform(shape=[], minval=1, maxval=3, dtype=tf.int32)
    choice = 3
    if choice == 0:
        aug_wav = add_white_noise(wav, 0.4)
    elif choice == 1:
        aug_wav = time_stretch(wav, 0.9)
    elif choice == 2:
        aug_wav = pitch_scale(wav, sr, 5)
    elif choice == 3:
        aug_wav = random_gain(wav, 1, 2)
    aug_wav = librosa.resample(aug_wav, orig_sr=sr, target_sr=16000 )
    aug_wav = tf.convert_to_tensor(aug_wav)
    return aug_wav

In [270]:
wave = load_wav_16k_mono(Yes_Boat)
nwave = augment(Yes_Boat)

In [None]:
wave

In [None]:
nwave

In [None]:
plt.plot(wave)
plt.plot(nwave)
plt.show()

## Define Paths to Positive and Negative Data

In [239]:
POS = os.path.join('Y_N_Boat', 'y')
NEG = os.path.join('Y_N_Boat', 'n')

## Create Datasets

In [240]:
num_files = len(os.listdir(NEG))    
pos = tf.data.Dataset.list_files(POS+'/*.wav')
neg = tf.data.Dataset.list_files(NEG+'/*.wav').take(num_files)

In [241]:
positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))
data = positives.concatenate(negatives)

In [None]:
#Number of Positive samples
len(pos)

In [None]:
#Number of Negative samples
len(neg)

## Add labels and Combine Positive and Negative Samples

In [244]:
positives = tf.data.Dataset.zip((pos, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))
negatives = tf.data.Dataset.zip((neg, tf.data.Dataset.from_tensor_slices(tf.zeros(len(neg)))))
data = positives.concatenate(negatives)

## Calculate Avg Length

In [None]:
lengths = []
for file in os.listdir(os.path.join('Y_N_Boat', 'y')):
    tensor_wave = load_wav_16k_mono(os.path.join('Y_N_Boat', 'y', file))
    lengths.append(len(tensor_wave))

## Mean, Min and Max

In [None]:
tf.math.reduce_mean(lengths)

In [None]:
tf.math.reduce_min(lengths)

In [None]:
tf.math.reduce_max(lengths)

In [272]:
def preprocess(file_path, label):
    choice = tf.random.uniform(shape=[], minval=1, maxval=3, dtype=tf.int32)
    if choice == 0:
        wav = augment(file_path)
    else:
        wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([48000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav], 0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=-1)
    # Normalization
    mean = tf.math.reduce_mean(spectrogram)
    std = tf.math.reduce_std(spectrogram)
    spectrogram = (spectrogram - mean) / std
    choice = tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)
    if choice == 0:
        spectrogram = brightness(spectrogram, 0.05)

    return spectrogram, label

In [None]:
filepath, label = positives.shuffle(buffer_size=10000).as_numpy_iterator().next()
spectrogram, label = preprocess(filepath, label)
filepath

In [None]:
plt.figure(figsize=(30,20))
plt.imshow(tf.transpose(spectrogram)[0])
plt.show()


#  Create Training and Testing Partitions

In [None]:
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)

In [None]:
len(data)

In [None]:
len(data)*0.7

## Split into Training and Testing 

In [None]:
train = data.take(32)
test = data.skip(32).take(13)

In [None]:
samples, labels = train.as_numpy_iterator().next()

In [None]:
labels

In [None]:
samples.shape

#  Build Model

In [None]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=(1491, 257, 1)))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
opt =Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])

In [None]:
model.summary()

In [None]:
checkpoint_path = "checkpoint/YN.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
hist = model.fit(train, epochs=5, validation_data=test, callbacks=[callback])

In [None]:
os.listdir(checkpoint_dir)

In [None]:
plt.title('Loss')
plt.plot(hist.history['loss'], 'r')
plt.plot(hist.history['val_loss'], 'b')
plt.show()

In [None]:
plt.title('Precision')
plt.plot(hist.history['precision'], 'r')
plt.plot(hist.history['val_precision'], 'b')
plt.show()

In [None]:
plt.title('Recall')
plt.plot(hist.history['recall'], 'r')
plt.plot(hist.history['val_recall'], 'b')
plt.show()

In [None]:
PATH = os.path.join('Y_N_Boat', 'test')

In [None]:
test = tf.data.Dataset.list_files(PATH+'/*.wav')
data = tf.data.Dataset.zip((test, tf.data.Dataset.from_tensor_slices(tf.ones(len(pos)))))


In [None]:
def load_wav_16k_mono(filename):
    # Load encoded wav file
    file_contents = tf.io.read_file(filename)
    # Decode wav (tensors by channels) 
    wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
    # Removes trailing axis
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Goes from 44100Hz to 16000hz - amplitude of the audio signal
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav


In [None]:
def preprocess(file_path, label): 
    wav = load_wav_16k_mono(file_path)
    wav = wav[:48000]
    zero_padding = tf.zeros([80000] - tf.shape(wav), dtype=tf.float32)
    wav = tf.concat([zero_padding, wav],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram, label

In [None]:
filepath, label = data.shuffle(buffer_size=10000).as_numpy_iterator().next()
spectrogram, label = preprocess(filepath, label)

In [None]:
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)
test = data

In [None]:
def load_mp3_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    res = tfio.audio.AudioIOTensor(filename)
    # Convert to tensor and combine channels 
    tensor = res.to_tensor()
    tensor = tf.math.reduce_sum(tensor, axis=1) / 2 
    # Extract sample rate and cast
    sample_rate = res.rate
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    # Resample to 16 kHz
    wav = tfio.audio.resample(tensor, rate_in=sample_rate, rate_out=16000)
    return wav

In [None]:
def preprocess_mp3(sample, index):
    sample = sample[0]
    zero_padding = tf.zeros([80000] - tf.shape(sample), dtype=tf.float32)
    wav = tf.concat([zero_padding, sample],0)
    spectrogram = tf.signal.stft(wav, frame_length=320, frame_step=32)
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.expand_dims(spectrogram, axis=2)
    return spectrogram

In [None]:
results = {}
for file in os.listdir(os.path.join('Y_N_Boat', 'test')):
    try:
        FILEPATH = os.path.join('Y_N_Boat', 'test', file)
        
        wav = load_mp3_16k_mono(FILEPATH)
        audio_slices = tf.keras.utils.timeseries_dataset_from_array(wav, wav, sequence_length=80000, sequence_stride=80000, batch_size=1)
        audio_slices = audio_slices.map(preprocess_mp3)
        audio_slices = audio_slices.batch(64)
        
        yhat = model.predict(audio_slices)
        
        results[file] = yhat

    except Exception as e:
        print(f"Error processing file {file}: {e}")
        # Optionally, you can continue to the next file or handle the error as needed.
        continue

In [None]:
results

In [None]:
def process_results(results):
    processed_results = {}
    for file, predictions in results.items():
        # Flatten the array and convert to list
        predictions_list = predictions.flatten().tolist()

        # Check for four or more consecutive ones
        consecutive_ones = 0
        for prediction in predictions_list:
            if prediction == 1:
                consecutive_ones += 1
                if consecutive_ones >= 5:
                    processed_results[file] = 1
                    break
            else:
                consecutive_ones = 0

        # If the loop ends without finding four consecutive ones, set result to 0
        if file not in processed_results:
            processed_results[file] = 0

    return processed_results

# Assume 'results' is your original dictionary with the predictions
final_results = process_results(results)
final_results


In [None]:
def process_results_with_groups(results):
    processed_results = {}
    for file, predictions in results.items():
        # Flatten the array and convert to list
        predictions_list = predictions.flatten().tolist()

        # Count groups of consecutive ones
        groups = 0
        count = 0
        for prediction in predictions_list:
            if prediction == 1:
                count += 1
            else:
                if count > 0:
                    groups += 1
                count = 0

        # Adding the last group if it ends with 1
        if count > 0:
            groups += 1

        # Set the final result based on the number of groups
        if groups >= 4:
            processed_results[file] = groups
        else:
            processed_results[file] = 0

    return processed_results

# Assume 'results' is your original dictionary with the predictions
final_results_with_groups = process_results_with_groups(results)

final_results_with_groups

In [None]:
class_preds = {}
for file, logits in results.items():
    class_preds[file] = [1 if prediction > 0.99 else 0 for prediction in logits]

In [None]:

from itertools import groupby
postprocessed = {}
for file, scores in class_preds.items():
    postprocessed[file] = tf.math.reduce_sum([key for key, group in groupby(scores)]).numpy()


In [None]:
import csv
with open('results.csv', 'w', newline='') as f:
    writer = csv.writer(f, delimiter=',')
    writer.writerow(['recording', 'boat'])
    for key, value in postprocessed.items():
        writer.writerow([key, value])