<a href="https://colab.research.google.com/github/Becode-turing-5-30/flood-sound-detection/blob/main/start.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup for connecting Google Colab -> Google Drive -> GitHub

In [10]:
# Mount your Google Drive
from google.colab import drive # import drive from google colab
from os.path import join  

ROOT = '/content/drive'     # default for the drive in colab
MY_GOOGLE_DRIVE_PATH = 'My Drive/ML-Projects/flood-sound-detection' # path to your project on Google Drive

PROJECT_PATH = join(ROOT, MY_GOOGLE_DRIVE_PATH)
print("PROJECT_PATH: ", PROJECT_PATH) 

drive.mount(ROOT)           # we mount the drive

PROJECT_PATH:  /content/drive/My Drive/ML-Projects/flood-sound-detection
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# GitHub Repository setup

# API KEY from json
import json
ENV_JSON_PATH = f'{ROOT}/My Drive/ML-Projects/env.json' # replace with the path with your json with token api
with open(ENV_JSON_PATH) as f:
  r = json.load(f)
  my_token = r["TOKEN"]

# GIT
GIT_USERNAME = "Becode-turing-5-30"                     # replace with your Github username 
GIT_TOKEN = my_token
GIT_REPOSITORY = "flood-sound-detection"                # Replace with your github repository in this case we want 

GIT_PATH = f"https://{GIT_TOKEN}@github.com/{GIT_USERNAME}/{GIT_REPOSITORY}.git"

In [3]:
%cd {PROJECT_PATH}
%pwd

/content/drive/My Drive/ML-Projects/flood-sound-detection


'/content/drive/My Drive/ML-Projects/flood-sound-detection'

# Git Clone

In [None]:
# GIT CLONE on mounted Google drive if not done yet.
%cd {PROJECT_PATH}
!git clone "{GIT_PATH}" . # clone the github repository

/content/drive/My Drive/ML-Projects/flood-sound-detection
fatal: destination path '.' already exists and is not an empty directory.


In [None]:
# Change to current project directory
%cd {PROJECT_PATH} 

/content/drive/My Drive/ML-Projects/flood-sound-detection


# Git Pull

In [None]:
!git fetch
!git pull origin main

From https://github.com/Becode-turing-5-30/flood-sound-detection
 * branch            main       -> FETCH_HEAD
Updating fd5b31a..5f0b9a2
Checking out files: 100% (8/8), done.
Fast-forward
 .gitignore                        |       4 [32m+[m
 URL/Untitled.ipynb                |   93054 [32m++[m
 URL/database_final.csv            |    7082 [32m+[m
 URL/unbalanced_train_segments.csv | 2041792 [32m++++++++++++++++++++++++++++++++++[m
 micro-stream-analyser.ipynb       |     155 [32m+[m
 readme-youtube.md                 |      14 [32m+[m
 start_here.ipynb                  |       2 [32m+[m[31m-[m
 youtube-extraction.py             |      90 [32m+[m
 8 files changed, 2142192 insertions(+), 1 deletion(-)
 create mode 100644 URL/Untitled.ipynb
 create mode 100644 URL/database_final.csv
 create mode 100644 URL/unbalanced_train_segments.csv
 create mode 100644 micro-stream-analyser.ipynb
 create mode 100644 readme-youtube.md
 create mode 100644 youtube-extraction.py


# GitHub useful commands

In [22]:
# Check statuses of the files that changed
!git status 

On branch develop
Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mmodified:   yammnet_notebook.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")


In [None]:
# Create branch and checkout
!git checkout -b develop 

M	start_here.ipynb
Switched to a new branch 'develop'


In [None]:
# checkout
!git checkout main --force

Switched to branch 'main'
Your branch is behind 'origin/main' by 17 commits, and can be fast-forwarded.
  (use "git pull" to update your local branch)


# GitHub commit changes and push

In [23]:
# Add / track changed files
!git add .

In [24]:
# Commit the changes
!git config --global user.email "kethien.ng@gmail.com"
!git config --global user.name "KeThien"
!git commit -m '"Commit from colab: yammnet model training"'

[develop 5cdea6c] "Commit from colab: yammnet model training"
 1 file changed, 1 insertion(+), 1 deletion(-)
 rewrite yammnet_notebook.ipynb (76%)


In [26]:
# Push the changes to GitHub Repository
!git push origin develop

To https://github.com/Becode-turing-5-30/flood-sound-detection.git
 ! [rejected]        develop -> develop (fetch first)
error: failed to push some refs to 'https://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@github.com/Becode-turing-5-30/flood-sound-detection.git'
hint: Updates were rejected because the remote contains work that you do
hint: not have locally. This is usually caused by another repository pushing
hint: to the same ref. You may want to first integrate the remote changes
hint: (e.g., 'git pull ...') before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.


# IMPORT CODE FROM yammnet_training.py

In [4]:
!pip install tensorflow_io

Collecting tensorflow_io
  Downloading tensorflow_io-0.19.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (22.7 MB)
[K     |████████████████████████████████| 22.7 MB 49.8 MB/s 
Collecting tensorflow-io-gcs-filesystem==0.19.1
  Downloading tensorflow_io_gcs_filesystem-0.19.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 41.0 MB/s 
Installing collected packages: tensorflow-io-gcs-filesystem, tensorflow-io
Successfully installed tensorflow-io-0.19.1 tensorflow-io-gcs-filesystem-0.19.1


In [11]:
import os

from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio

yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

class ReduceMeanLayer(tf.keras.layers.Layer):
  def __init__(self, axis=0, **kwargs):
    super(ReduceMeanLayer, self).__init__(**kwargs)
    self.axis = axis

  def call(self, input):
    return tf.math.reduce_mean(input, axis=self.axis)


@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

def load_wav_for_map(filename, label, fold):
  return load_wav_16k_mono(filename), label, fold

def extract_embedding(wav_data, label, fold):
  ''' run YAMNet to extract embedding from the wav data '''
  scores, embeddings, spectrogram = yamnet_model(wav_data)
  num_embeddings = tf.shape(embeddings)[0]
  return (embeddings,
            tf.repeat(label, num_embeddings),
            tf.repeat(fold, num_embeddings))

def dataframe_creator(main_directory, categories=[]):
    df = pd.DataFrame({'filename':[], 'fold':[], 'target':[], 'category':[]})

    for i, cat in enumerate(categories):
        directory = f'{main_directory}/{cat}'
        for filename in os.listdir(directory):
            df = df.append({'filename':f'{directory}/{filename}', 'fold':0, 'target':i, 'category':cat}
                , ignore_index=True)

    df['target'] = df.target.astype('int')
    df['fold'] = df.fold.astype('int')
    df = shuffle(df)
    df.reset_index(drop=True, inplace=True)
    df['fold'] = df.apply(lambda x:0 if int(x.name)<=int(0.8*len(df)) else (1 if int(x.name)<int(0.9*0.8*len(df)) else 2), axis=1)

    return df

def dataset_builder(df):
    filenames = df['filename']
    targets = df['target']
    folds = df['fold']
    main_ds = tf.data.Dataset.from_tensor_slices((filenames, targets, folds))
    main_ds = main_ds.map(load_wav_for_map)
    main_ds = main_ds.map(extract_embedding).unbatch()
    cached_ds = main_ds.cache()
    train_ds = cached_ds.filter(lambda embedding, label, fold: fold ==0)
    val_ds = cached_ds.filter(lambda embedding, label, fold: fold == 1)
    test_ds = cached_ds.filter(lambda embedding, label, fold: fold == 2)

    # remove the folds column now that it's not needed anymore
    remove_fold_column = lambda embedding, label, fold: (embedding, label)

    train_ds = train_ds.map(remove_fold_column)
    val_ds = val_ds.map(remove_fold_column)
    test_ds = test_ds.map(remove_fold_column)

    train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)
    test_ds = test_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)
    return train_ds, val_ds, test_ds

def model_builder(train_ds, val_ds, classes):
    yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
    model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1024), dtype=tf.float32,
                          name='input_embedding'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(len(classes))
    ], name='my_model')

    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 optimizer="adam",
                 metrics=['accuracy'])

    callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                            patience=3,
                                            restore_best_weights=True)
    model.fit(train_ds, epochs=50, validation_data=val_ds, callbacks=callback)

    input_segment = tf.keras.layers.Input(shape=(), dtype=tf.float32, name='audio')
    embedding_extraction_layer = hub.KerasLayer(yamnet_model_handle,
                                            trainable=False, name='yamnet')
    _, embeddings_output, _ = embedding_extraction_layer(input_segment)
    serving_outputs = model(embeddings_output)
    serving_outputs = ReduceMeanLayer(axis=0, name='classifier')(serving_outputs)
    serving_model = tf.keras.Model(input_segment, serving_outputs)
    return serving_model

In [12]:
with tf.device('/device:GPU:0'):
  classes = ['yes', 'no']
  df = dataframe_creator('sounds',classes)

In [14]:
with tf.device('/device:GPU:0'):
  train_ds, val_ds, test_ds = dataset_builder(df)

In [15]:
with tf.device('/device:GPU:0'):
  model = model_builder(train_ds, val_ds, classes)





Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50


In [16]:
saved_model_path = './stream_yamnet'
model.save(saved_model_path, include_optimizer=False)





INFO:tensorflow:Assets written to: ./stream_yamnet/assets


INFO:tensorflow:Assets written to: ./stream_yamnet/assets
