# SVM Classifier Training

This notebook trains a classifier for use in <em>The Shapeshifter</em>. The dataset should be provided in the form of a single audio file for each <em>phase</em> of performance containing all examples. The dataset should provide examples of the vocalisations associated by the performer with each <em>phase</em>.

## 1. Import Libraries

In [None]:
import numpy as np
import librosa
from sklearn import *
import matplotlib.pyplot as plt
import IPython.display as ipd
from pyAudioAnalysis import audioBasicIO as aIO
from pyAudioAnalysis import audioSegmentation as aS
from maad import sound
from maad.rois import find_rois_cwt
from maad.util import plot_spectrogram
import scipy
from scipy import signal
import sklearn
from joblib import dump,load

## 2. Set Variables

In [None]:
sr = 48000

# filepath where all audio files are located

filepath = ''

# audio filename - This notebook assumes the audio files are named in the manner "filename"_"phase".wav

filename = ''

## 3. Import Audio Files

In [None]:
ds_dict = {}

for i in range(1, 10):
    ds_dict["phase_" + str(i)], _ = librosa.load(filepath + '/' + filename + str(i) + ".wav", sr=sr)

## 4. Normalise Audio Files

In [None]:
for i in range(len(ds_dict)):
    
    i += 1
    
    if np.abs(ds_dict['phase_' + str(i)].max()) >= np.abs(ds_dict['phase_' + str(i)].min()):
        ds_dict['phase_' + str(i)] = ds_dict['phase_' + str(i)]/ds_dict['phase_' + str(i)].max()

    else:
        ds_dict['phase_' + str(i)] = ds_dict['phase_' + str(i)]/np.abs(ds_dict['phase_' + str(i)].min())

## 5. Define Functions to Extract Examples

In [None]:
# Obtains a spectrogram of an audio file

def getSpectrogram(audio, sr, plot=True):
    
    s = audio
    Sxx, tn, fn, ext = sound.spectrogram(s, sr, nperseg=1024, noverlap=512)
    if plot == True:
        plot_spectrogram(Sxx, extent=ext, db_range=60, gain=20, colorbar=False, figsize=(2.5,10))
        
    return Sxx, tn, fn, ext

In [None]:
# Find the frequency band with the highest energy to provide as centre freq to regions of interest

def getCentreFreq(spectrogram, fn):
    
    mean = 0
    centre = 0

    # Iterate over rows in spectrogram
    for row in range(spectrogram.shape[0]):
        
        # Take mean value
        temp = np.mean(spectrogram[row])
        
        # Replace if higher
        if temp > mean:
            
            centre = row
            mean = temp
    
    # Return frequency band
    return fn[centre]

In [None]:
# Segments and audio file based upon regions of interest identified in a spectrogram
# If display == True plots are created

def segmentAudio(audio, spectrogram, centre, sr, display=True):
    
    # Find regions of interest
    df = find_rois_cwt(audio, sr, flims=(centre-(centre/2), centre+(centre/2)), tlen=1, th=0, display=display, figsize=(10,6))
    if display == True:
        print(df)
    
    # Declare list to append
    audio_list = []

    # Segment audio
    for row in range(df.shape[0]):
        aud = (audio[int(df.iloc[row][1]*sr):int(df.iloc[row][3]*sr)])
        
        if np.abs(aud.max()) >= np.abs(aud.min()):
            
            aud = aud/aud.max()
            
        else:
            
            aud = aud/np.abs(aud.min())
            
        audio_list.append(aud)
    
    # Display if true
    if display == True:
                             
        testy = []

        for i in audio_list:
            i = np.array(i)
            testy.append(i)
            testy.append(np.zeros(int(sr/2)))

        testy = np.concatenate(testy)
        testy = testy.flatten()
                             
        ipd.Audio(testy, rate=sr)
        
    return(audio_list)

## 6. Create Dataset

In [None]:
dataset = []

for i in range(len(ds_dict)):
    i += 1
    Sxx, tn, fn, ext = getSpectrogram(ds_dict['phase_' + str(i)], sr)
    centre_freq = getCentreFreq(Sxx, fn)
    segments = segmentAudio(ds_dict['phase_' + str(i)], Sxx, centre_freq, sr)
    dataset.append(segments)

## 7. Create labels

In [None]:
labels = []

for i, x in enumerate(dataset):
    labs = []
    for j in range(len(x)):
        labels.append(i)
        
labels = np.array(labels)

## 8. Extract Features

In [None]:
# 1-Dimensional linear interpolation

def lin_interp_1d(data, out_size): # From Stefano Fasciani
    
    in_size = data.shape[0]
    x_in = np.arange(0,in_size)
    interpolator = scipy.interpolate.interp1d(x_in, data)
    x_out = np.arange(0,in_size-1,((in_size-1)/out_size))
    output = interpolator(x_out)
    output = output[0:out_size]
    
    return output

In [None]:
# Extract rms and spectral centroid

feats = np.zeros((len(labels), 200))

counter = 0

for clas in dataset:
    for audio in clas:
        rms = librosa.feature.rms(y=audio)
        rms = rms.flatten()
        rms = lin_interp_1d(rms, 100)
        feats[counter,:][0:100] = rms
        cent = librosa.feature.spectral_centroid(y=audio,sr=sr)
        cent = cent.flatten()
        cent = lin_interp_1d(mels, 100)
        feats[counter,:][100:200] = cent
        counter += 1

## 9. Train Model

In [None]:
svm = sklearn.svm.SVC(kernel='rbf', C=1)

In [None]:
svm.fit(feats_train, lab_train)

## 10. Repeat 1000 Times and Calculate Mean F1

In [None]:
f1 = []

for i in range(100):

    feats_train, feats_test, lab_train, lab_test = sklearn.model_selection.train_test_split(feats, labels, test_size=0.2)
    svm = sklearn.svm.SVC(kernel='rbf', C=10)
    svm.fit(feats_train, lab_train)
    lab_predict =  svm.predict(feats_test)
    f1.append(sklearn.metrics.f1_score(lab_test, lab_predict, average='weighted'))
    
f1 = np.array(f1)
np.mean(f1)

## 11. Export Model

In [None]:
filepath = ''

dump(svm, filepath + '/' + 'classifier.joblib')