In [1]:
import glob

import numpy as np
import pandas as pd
from scipy import signal

import librosa
import librosa.display as DSP
import librosa as lr
from tempfile import mktemp
import imagehash
from PIL import Image

import pylab

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

## Extract Features From Wav

In [2]:
class Song(object):
    def __init__(self, wavdata, samplingFrequency):
        self.wavdata,self.samplingFrequency = wavdata, samplingFrequency
        
        y_harmonic, y_percussive = librosa.effects.hpss(wavdata)
        self.y_harmonic, self.y_percussive = y_harmonic, y_percussive
        
        self.spectrogram = self.getSpectrogram()
        
        # Harmonic
        self.harmonic = np.mean(y_harmonic)
        self.harmonic_var = np.var(y_harmonic)
        
        # Precussive
        self.percussive = np.mean(y_percussive)
        self.percussive_var = np.var(y_percussive)
        
        # Pitches
        pitches, magnitudes = librosa.core.piptrack(y = wavdata , sr = samplingFrequency)
        self.max_pitch = np.max(pitches)
        self.avg_pitch = np.mean(pitches)
        self.var_pitch = np.var(pitches)
        
        # LPC Calculations
        spectral_lpc = librosa.lpc(wavdata, order=2)
        self.max_lpc = np.max(spectral_lpc)
        self.avg_lpc = np.mean(spectral_lpc)
        self.var_lpc = np.var(spectral_lpc)
        
        # Chroma
        chroma=lr.feature.chroma_cens(y=y_harmonic, sr=samplingFrequency)
        self.chromaAvg = np.mean(chroma)
        self.chromaVar = np.var(chroma)
        
        chroma_stft =lr.feature.chroma_stft(y=y_harmonic, sr=samplingFrequency)
        self.chroma_stft_mean = np.mean(chroma_stft)
        self.chroma_stft_var = np.var(chroma_stft)

        chroma_cqt =lr.feature.chroma_cqt(y=y_harmonic, sr=samplingFrequency)
        self.chroma_cqt_mean = np.mean(chroma_cqt)
        self.chroma_cqt_var = np.var(chroma_cqt)
        
        # MFCC
        mfccs = lr.feature.mfcc(y=y_harmonic, sr=samplingFrequency)
        self.Mfccs = np.mean(mfccs)
        self.Mfccs_var = np.var(mfccs)
        
        delta = lr.feature.delta(mfccs)
        self.mfcc_delta_mean = np.mean(delta)
        self.mfcc_delta_var = np.var(delta)
        
        # Contrast
        contrast=lr.feature.spectral_contrast(y=y_harmonic,sr=samplingFrequency)
        self.Contrast = np.mean(contrast)
        self.Contrast_var = np.var(contrast)
        
        # Rolloff
        rolloff = lr.feature.spectral_rolloff(y=wavdata, sr=samplingFrequency)
        self.Rolloff = np.mean(rolloff)
        self.Rolloff_var = np.var(rolloff)

        # Z-rate
        zrate=lr.feature.zero_crossing_rate(y_harmonic)
        self.Zrate = np.mean(zrate)
        self.Zrate_var = np.var(zrate)

        # Centroid
        cent = lr.feature.spectral_centroid(y=wavdata, sr=samplingFrequency)
        self.Cent = np.mean(cent)
        self.Cent_var = np.var(cent)

        tonnetz = lr.feature.tonnetz(y=wavdata, sr=samplingFrequency)
        self.tonnetz_mean = np.mean(tonnetz)
        self.tonnetz_var = np.var(tonnetz)
        
        # Poly
        poly_features = lr.feature.poly_features(S=self.spectrogram, sr=samplingFrequency)
        self.poly_features_mean = np.mean(poly_features)
        self.poly_features_var = np.var(poly_features)
        
        # Bandwidth
        spec_bw = lr.feature.spectral_bandwidth(y=wavdata, sr=samplingFrequency)
        self.spec_bw_mean = np.mean(spec_bw)
        self.spec_bw_var = np.var(spec_bw)
        
        # RMSE
        rmse = lr.feature.rms(y=wavdata)
        self.rmse_mean = np.mean(rmse)
        self.rmse_var = np.var(rmse)
        
        # Melspectogram
        melspectrogram = lr.feature.melspectrogram(y=wavdata, sr=samplingFrequency)
        self.melspec_mean = np.mean(melspectrogram)
        self.melspec_var = np.var(melspectrogram)

        
    # Features with Hashing
    def getSpectrogram(self):
        f, t, Sxx = signal.spectrogram(self.wavdata, 44100)
#         return np.abs(librosa.stft(self.wavdata))
        return Sxx

    def getHashedSpectrogram(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        librosa.display.specshow(librosa.amplitude_to_db(self.spectrogram, ref=np.max), y_axis='linear')
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)

    def getFeature_centroid(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        spectral_centroids = librosa.feature.spectral_centroid(S = self.spectrogram, sr = self.samplingFrequency)
        DSP.specshow(spectral_centroids)
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)

    def getFeature_rolloff(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        spectral_rolloffs = librosa.feature.spectral_rolloff(S = self.spectrogram, sr = self.samplingFrequency)
        librosa.display.specshow(spectral_rolloffs)
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)

    def getFeature_chroma(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        spectral_chroma = librosa.feature.chroma_stft(y=self.y_harmonic, sr = self.samplingFrequency)
        librosa.display.specshow(spectral_chroma)
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)
    
    def getFeature_chroma_cqt(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        spectral_chroma_cqt = librosa.feature.chroma_cqt(y=self.y_harmonic, sr=self.samplingFrequency)
        librosa.display.specshow(spectral_chroma_cqt)
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)
    
    def getFeature_mfcc(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        spectral_mfcc = librosa.feature.mfcc(y = self.y_harmonic, sr = self.samplingFrequency)
        librosa.display.specshow(spectral_mfcc)
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)
    
    def getFeature_mels_spectorgram(self):
        outputFile = mktemp('.png')  # use temporary file
        pylab.axis('off')  # no axis
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[])  # Remove the white edge
        spectral_melspectrogram = librosa.feature.melspectrogram(S = self.spectrogram, sr = self.samplingFrequency)
        librosa.display.specshow(spectral_melspectrogram)
        pylab.savefig(outputFile, bbox_inches=None, pad_inches=0)
        pylab.close()
        return int(str(imagehash.phash(Image.open(outputFile))), 16)
    

## Data Pre-processing

In [3]:
X = []
y = []

lstFolders = ['Open the door', 'Close the door']
for lst in lstFolders:
    listFiles = glob.glob(r"C://Users/kamel/Downloads/Data/Training Data/*/{}/*/*".format(lst))
    for file in listFiles:
        try:
            data, sampleRate = librosa.load(file)
            song = Song(data, sampleRate)
            lst1 = [song.max_pitch, song.avg_pitch, song.var_pitch, song.max_lpc, song.avg_lpc, song.var_lpc, song.harmonic, \
                   song.harmonic_var, song.percussive, song.percussive_var, song.chromaAvg, song.chromaVar, \
                   song.chroma_stft_mean, song.chroma_stft_var, song.chroma_cqt_mean, song.chroma_cqt_var, \
                   song.Mfccs, song.Mfccs_var, song.mfcc_delta_mean, song.mfcc_delta_var, song.Contrast, song.Contrast_var, \
                   song.Rolloff, song.Rolloff_var, song.Zrate, song.Zrate_var, song.Cent, song.Cent_var, song.tonnetz_mean, \
                   song.tonnetz_var, song.poly_features_mean, song.poly_features_var, song.spec_bw_mean, song.spec_bw_var, \
                   song.rmse_mean, song.rmse_var, song.getFeature_chroma(), \
                   song.getFeature_mfcc(), song.getFeature_mels_spectorgram()]
            
            features = np.array(lst1)
            X.append(features)
            y.append(lstFolders.index(lst))
        except:
            continue

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


In [4]:
df = pd.DataFrame(columns = [
    'max_pitch', 'avg_pitch', 'var_pitch', 'max_lpc', 'avg_lpc', 'var_lpc', 'harmonic', 'harmonic_var', 'percussive', \
    'percussive_var', 'chromaAvg', 'chromaVar', 'chroma_stft_mean', 'chroma_stft_var', 'chroma_cqt_mean', 'chroma_cqt_var' \
    'Mfccs', 'Mfccs_var', 'mfcc_delta_mean', 'mfcc_delta_var', 'Contrast', 'Contrast_var', 'Rolloff', 'Rolloff_var', 'Zrate', \
    'Zrate_var', 'Cent', 'Cent_var', 'tonnetz_mean', 'tonnetz_var', 'poly_features_mean', 'poly_features_var', 'spec_bw_mean', \
    'spec_bw_var', 'rmse_mean', 'rmse_var', 'melspec_mean', 'melspec_var', 'chroma_hashcode', \
    'mfcc hashcode' 'mel hashcode'
    ], data = X)

In [5]:
df['y'] = y

In [6]:
X = np.array(X)
y = np.array(y)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

In [8]:
scaler = StandardScaler()

In [9]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Model Training
### SVM

In [10]:
clf = SVC(gamma='auto')
clf.fit(X_train, y_train)

SVC(gamma='auto')

In [11]:
clf.score(X_train, y_train)

0.95

In [12]:
clf.score(X_test, y_test)

0.7777777777777778

### Random Forest

In [13]:
r_clf = RandomForestClassifier(max_depth=5)
r_clf.fit(X_train, y_train)

RandomForestClassifier(max_depth=5)

In [14]:
r_clf.score(X_train, y_train)

1.0

In [15]:
r_clf.score(X_test, y_test)

0.8444444444444444

## Saving Data to CSV

In [16]:
df.to_csv("data.csv")