<a href="https://colab.research.google.com/github/Bepo2002/Humbug-4YP/blob/main/Test_bench.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [60]:
# Generate a clip with noise and mosquito
# Chop into X second chunks
# Calculate MFCCs
# Save as data point?

import zipfile
from scipy.io import wavfile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import Audio
import librosa

In [61]:
def extract_sound(filename):
  zip_files = ['./drive/MyDrive/4YP/train.zip','./drive/MyDrive/4YP/dev.zip']
  folders = ['train/', 'dev/a/', 'dev/b/']
  dest_dir = './drive/MyDrive/4YP/Data'
  filename_to_extract = str(filename) + '.wav'

  for zip_path in zip_files:
    for folder in folders:
      with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        filename_to_extract = folder + filename_to_extract
        if filename_to_extract in zip_ref.namelist():
          zip_ref.extract(filename_to_extract, dest_dir)
          print(f"{filename_to_extract} has been extracted to {dest_dir}")
          final = './drive/MyDrive/4YP/Data/'+ filename_to_extract
          return wavfile.read(final)
  print(f"{filename_to_extract} not found in any of the provided ZIP files.")

# Read in .csv
def csv_read():
  metadata = pd.read_csv('./drive/MyDrive/4YP/Data/humbugdb_meta.csv')
  metadata = metadata.set_index('id')
  return metadata

def gaussian(x, mu, sigma):
  return np.exp(-((x - mu)**2) / (2 * sigma**2))

In [62]:
def long_sound_gen(mos_num,noise_factor=1):
  # Full function for generating noise + mosquito
  metadata = csv_read()

  # NOISE
  subset = metadata[metadata['sound_type'] == 'background']
  subset = subset.loc[metadata['length'] > 30]
  subset = subset.loc[metadata['sample_rate'] == 44100]
  sel_noise = subset.sample()
  noise_id = sel_noise.index.values[0]

  # Import the sound wave and find samplerate
  n_samplerate, noise = extract_sound(noise_id)

  # MOSQUITO
  subset = metadata[metadata['sound_type'] == 'mosquito']
  subset = subset.loc[(metadata['length'] < 10) & (metadata['length'] > 3)]
  subset = subset.loc[metadata['sample_rate'] == 44100]
  sel_mos = subset.sample()
  mos_id = sel_mos.index.values[0]

  # Import the sound wave and find samplerate
  samplerate, data = extract_sound(mos_id)


  # Add sound to random time in noise
  # Initilise vectors to store new audio and categories
  new_audio = np.zeros(noise.size)
  sound_cat = np.zeros(noise.size)
  old_audio = np.zeros(noise.size)

  for i in range(0,mos_num):

    # Choose a 'time' to put the middle of mosquito sound wave
    mosquito_time = np.random.randint(data.size//2, high=noise.size-data.size//2, dtype=int)
    print(mosquito_time)

    # Put wave into long array
    new_audio[mosquito_time - data.size//2:(mosquito_time - data.size//2)+data.size] = data
    sound_cat[mosquito_time - data.size//2:mosquito_time + data.size//2] = 1

    # Generate gaussian in same place as mos audio
    x = np.linspace(0,noise.size,noise.size,dtype=int)
    y = gaussian(x,mosquito_time,data.size/6)

    # Multiply to apply gaussian to audio
    old_audio = y*new_audio + old_audio

  # Add faded mosquito sound to background noise
  full_audio = noise*noise_factor + old_audio

  return full_audio, samplerate, sound_cat

In [63]:
def chop_chop(audio,samplerate,clip_size=1):
  # Choose bin size (default 1 second clip)
  bin_size = int(samplerate*clip_size)

  # Reshape Data
  rows = int(audio.size/bin_size)
  audio = audio[:rows*bin_size]
  audio = np.reshape(audio,(rows,bin_size))
  audio = np.concatenate((audio,np.zeros((rows,1))),axis=1)
  return audio

In [64]:
def generate_training_data(mosquitoness=2, noise_factor=0.5, chop_size=1, n_mfcc=10):
  full_audio, samplerate, sound_cat = long_sound_gen(mosquitoness,noise_factor)
  audio_normal = librosa.util.normalize(full_audio)
  chopped_audio = chop_chop(audio_normal,samplerate,chop_size)
  mfccs = np.zeros((len(chopped_audio),n_mfcc))
  for i in range(0,len(chopped_audio)):
    mfcc = librosa.feature.mfcc(y=chopped_audio[i].astype(float), sr=samplerate,  n_mfcc=n_mfcc)
    mfccs[i] = np.mean(mfcc,axis=1)

  chopped_cat = chop_chop(sound_cat,samplerate,chop_size)
  av_chopped_cat = np.mean(chopped_cat,axis=1)

  # Returns a number of seconds in audio x n_mfcc matrix and an array of sound category (how much mosquito) in each X second chop
  return mfccs, av_chopped_cat

In [72]:
def multiple_training_data(amount_of_audio, mosquitoness=2, noise_factor=0.5, chop_size=1, n_mfcc=10):
  X, Y = generate_training_data(mosquitoness, noise_factor, chop_size, n_mfcc)
  for i in range(0,amount_of_audio-1):
    mfccs, av_chopped_cat = generate_training_data()
    X = np.concatenate((X,mfccs))
    Y = np.concatenate((Y,av_chopped_cat))
  return X,Y

coef, cats = multiple_training_data(10)

print(coef.shape)
print(cats.shape)

train/220009.wav has been extracted to ./drive/MyDrive/4YP/Data
train/3455.wav has been extracted to ./drive/MyDrive/4YP/Data
1659855
274960
train/202174.wav has been extracted to ./drive/MyDrive/4YP/Data
train/1413.wav has been extracted to ./drive/MyDrive/4YP/Data
222964
1670364
train/220016.wav has been extracted to ./drive/MyDrive/4YP/Data
train/1207.wav has been extracted to ./drive/MyDrive/4YP/Data
186309
1639969
train/201143.wav has been extracted to ./drive/MyDrive/4YP/Data
train/1025.wav has been extracted to ./drive/MyDrive/4YP/Data
966073
740547
train/220019.wav has been extracted to ./drive/MyDrive/4YP/Data
train/1521.wav has been extracted to ./drive/MyDrive/4YP/Data
1751272
1192251
train/220020.wav has been extracted to ./drive/MyDrive/4YP/Data
train/3139.wav has been extracted to ./drive/MyDrive/4YP/Data
1078326
316833
train/219997.wav has been extracted to ./drive/MyDrive/4YP/Data
train/220893.wav has been extracted to ./drive/MyDrive/4YP/Data
2171205
3690065
train/2199