In [None]:
import numpy as np

import os
import copy

import pandas as pd

# import IPython.display as ipd # to listen to the files
import librosa # to manipulate audiofiles, audio preprocessing ==> need to read more https://librosa.org/doc/latest/index.html
# import python_speech_features as psf # some filtering and stuff ==> need to read more https://python-speech-features.readthedocs.io/en/latest/
# pyAudioAnalysis looks like a good library for feature extraction: https://github.com/tyiannak/pyAudioAnalysis ==> try?
import librosa.display

from matplotlib import pyplot as plt

import scipy

In [None]:
def get_mfcc(full_audio_path):
    '''
    https://stackoverflow.com/questions/37963042/python-librosa-what-is-the-default-frame-size-used-to-compute-the-mfcc-feature
    
    https://medium.com/@jonathan_hui/speech-recognition-feature-extraction-mfcc-plp-5455f5a69dd9#:~:text=PLP%20is%20very%20similar%20to,instead%20of%20the%20log%20compression.&text=It%20also%20uses%20linear%20regressive,and%20slightly%20better%20noise%20robustness.
    '''
    #audio_length_seconds = 2.5
    sample_rate = 44100
    hop_size = 512#347*audio_length_seconds
    fmin = 20
    fmax = sample_rate // 2
    n_mels = 128
    n_fft = n_mels * 20
    #samples = sample_rate * audio_length_seconds

    wave, __ =  librosa.load(full_audio_path)
    #clip on fix length
    wave = librosa.util.fix_length(wave, 132300, mode='constant')
    
    mfcc_features = librosa.feature.mfcc(y=wave, sr=sample_rate, n_mfcc =39,hop_length=int(hop_size),n_fft=n_fft,fmin=fmin,fmax=fmax)#n_mels=n_mels,
    #normalize
    mfcc_features -= (np.mean(mfcc_features, axis=0) + 1e-8)
    
    
    return mfcc_features

In [None]:
def load_pickle(path):
    return pd.read_pickle(path)

In [None]:
def add_noise(signal):
    noise = np.random.normal(0, 0.01, len(signal))
    noisy_signal = signal + noise
    #ipd.Audio(noisy_signal, rate=sample_rate)
    return noisy_signal

In [None]:
def change_pitch(signal,sample_rate):
    # Change pitch

    # Define how many fractional half steps it should be shifted
    n_steps = 4
    pitch_shifted = librosa.effects.pitch_shift(signal, sample_rate, n_steps)
    #ipd.Audio(pitch_shifted, rate=sample_rate)
    return pitch_shifted

def time_stretch(signal):
    # Time stretch
    stretch_factor = 0.7
    time_stretched = librosa.effects.time_stretch(signal, stretch_factor)
    #ipd.Audio(time_stretched, rate=sample_rate)
    return time_stretched

In [None]:
def filter_signal(signal):
    # Filter a "bad" noisy sample to see what the filter does

    # Apply Wiener filter. Documentation said window length should be uneven, although I don't understand why.
    filter_window_length = 7
    filtered_signal = scipy.signal.wiener(signal, mysize=filter_window_length)
    return filtered_signal

In [None]:
def filter_low_energy_signal(signal,threshold):
    energy = np.sum(librosa.feature.rmse(signal))
  
    if energy < threshold:
        return True

In [None]:
#Function to randomly mask out a stretch of time
def mask_time(spectrogram, max_masked=25, n_masks=1):

  x, y = spectrogram.shape
  altered_spec = copy.deepcopy(spectrogram)

  for i in range(n_masks):
    mask_size = np.random.randint(low=0, high=max_masked+1)
    mask_point = np.random.randint(low=0, high=y-mask_size)
    altered_spec[:, mask_point:mask_point+mask_size] = 0

  return altered_spec

# Function to randomly mask out a stretch of frequencies
def mask_freq(spectrogram, max_masked=25, n_masks=1):

  x, y = spectrogram.shape
  altered_spec = copy.deepcopy(spectrogram)

  for i in range(n_masks):
    mask_size = np.random.randint(low=0, high=max_masked+1)
    mask_point = np.random.randint(low=0, high=x-mask_size)
    altered_spec[mask_point:mask_point+mask_size, :] = 0

  return altered_spec

# Function to randomly mask out time and frequency stretches
def mask_both(spectrogram, max_masked_time, n_time, max_masked_freq, n_freq):

  spec = copy.deepcopy(spectrogram)

  spec = mask_time(spec, max_masked_time, n_time)
  spec = mask_freq(spec, max_masked_freq, n_freq)

  return spec

In [None]:
#create a dictionary with key as label and features as values
def createFilepathDic(dfrow,datadic,wd):
    #print file path
    #print(dfrow[0])
    path = os.path.join(wd,dfrow[0])
    label = dfrow[1]
    #feature = get_mfcc(path)
    #print(feature.shape)
    if label not in datadic.keys():
      datadic[label] = []
      datadic[label].append(path)
    else:
      exist_path = datadic[label]
      exist_path.append(path)
      datadic[label] = exist_path

    return datadic

In [None]:
#in the existing dictionary add new files
def add_to_DataDic(datadic,newfile,wd):
    print(os.listdir(os.path.join(wd,newfile)))
    for label in os.listdir(os.path.join(wd,newfile)):
        print(label)
        if label not in datadic.keys():
            datadic[label] = []
            path = newfile + label
            for file in os.listdir(os.path.join(wd,path)):
                filepath = os.path.join(os.path.join(wd,path),file)
                
                datadic[label].append(filepath)
        else:
            path = newfile + label
            for file in os.listdir(os.path.join(wd,path)):
                
                filepath = os.path.join(os.path.join(wd,path),file)
                
                
                exist_file = datadic[label]
                exist_file.append(filepath)
                datadic[label] = exist_file

    return datadic

In [None]:
class GetFeatureDic():
  
    def __init__(self,extractor):
        self.extractor = extractor
        
    #create a dictionary with key as label and features as values
    def createFeatureDic(self,datadic):
        featuredic = {}
        for label in datadic.keys():
            featuredic[label] = [ self.extractor(path) for path in datadic[label]]  
        return featuredic