In [5]:
from scipy import signal, linalg
import os,sys,scipy.io.wavfile, numpy as np

class AudioFile():
    
    spectrogram = None
    data = None
    rate = None
    title = None
    
    def __init__(self,rate,data,title):
        self.data = data
        self.rate = rate
        self.title = title
        self.spectrogram = self.spectrogram()
    
    def spectrogram(self):
        if not self.spectrogram == None: return self.spectrogram
        
        f, t, Sxx = signal.spectrogram(self.data, self.rate,'hamming',512,160,512)
        self.spectrogram = 10*np.log10(Sxx+sys.float_info.min)  
        return self.spectrogram
    
    def getFeatureMean(self,base):
        mean_vect = np.zeros(base().shape[1])
        coeff_on_base = np.dot(self.spectrogram().T,base)
        mean_vect += np.mean(coeff_on_base,0)
        return mean_vect
    
    def errorOnBase(self,base):
        coeff_on_base = np.dot(self.spectrogram().T,base)
        spectrogram_ric = np.dot(coeff_on_base,base.T).T
        error = np.linalg.norm(self.spectrogram()-spectrogram_ric)
        return error
    
    def __str__(self):
        return 'Audio file:'+self.title
        
def AudioFileUnknown(AudioFile):
    list_error = []
    type_map = {}
    
    def __init__(self,rate,data,title,list_type):
        AudioFile(self,rate,data,title)
        for i in range(len(list_type)): type_map.update({list_type[i]:i})
    
class AudioCollection():
    collection = []
    _spectrogram = None
    _base = None
    
    def __init__(self, path,unk=False):
        for filename in os.listdir(path):
            rate, data = scipy.io.wavfile.read(path+filename)
            if not unk: self.collection.append(AudioFile(rate,data,filename))
            else: self.collection.append(AudioFileUnknown(rate,data,filename))
    
    def __str__(self):
        toStr = 'AUDIO_COLLECTION ['+str(len(self.collection))+']:\n'
        for audio in self.collection:
            toStr += 'AUDIO: '+str(audio)+'\n' 
        return toStr
    
    def spectrogram(self):
        if not self._spectrogram == None: return self._spectrogram
        _spg = None
        for file in self.collection:
            if not _spg == None: _spg = file.spectrogram  
            else: scipy.hstack((_spg,file.spectrogram))
        self.spectrogram = _spg
        return self._spectrogram
    
    def base(self, percent):
        if not self._base == None: return self._base
        U, s, V= np.linalg.svd(self.spectrogram())
        ErgEvalue = np.sum(s)
        current_sum = 0
        percent_sum = ErgEvalue*percent/100
        for i in range(len(s)):
            current_sum += s[i]
            if current_sum >= percent_sum:
                break
        self._base = U[:,range(0,i)]
        return self._base
    
    def getFeatureMean(self):
        mean_vect = np.zeros(self.base().shape[1])
        for i in range(len(self.collection)):
            mean_vect += self.collection[i].getFeatureMean(self._base()) 
        return mean_vect/i
        

class AudioClassifier:
    
    collections = []
    unk_collection = []
    
    def __init__(self,path_colletions,path_unk):
        for path in path_colletions:
            self.collections.append(AudioCollection(path))
        unk_collection = AudioCollection(path_unk)
            
    def __str__(self):
        toStr = ''
        for elm in self.collections:
            toStr += str(elm)
        return toStr
    
    def classifyByRecostructionError(self):
        for unk in self.unk_collection:
            for t_c in self.collections:
                unk.errorOnBase(t_c.base())
        return 0
    def classifyByFeatureMeanValue():
        return 0
   

In [6]:
path_db = '05_AudioClassifier_Pdf/05_AudioClassifier_Pdf/database/'
path_music = path_db+'music/'
path_speech = path_db+'speech/'
path_unknowns = path_db.replace('database/','')+'unknownSounds/'

aclass = AudioClassifier([path_music,path_speech],path_unknowns)
print(aclass)

[<__main__.AudioCollection object at 0x000002A7FCBE9CF8>]
[<__main__.AudioCollection object at 0x000002A7FCBE9CF8>, <__main__.AudioCollection object at 0x000002A7FAD79F98>]
AUDIO_COLLECTION [48]:
AUDIO: Audio file:002_ABBA-IDo,IDo,IDo,IDo,IDo-10s-A.wav
AUDIO: Audio file:004_AnnieLennox&AlGreen-PutALittleLoveintoYourHeart-10s-A.wav
AUDIO: Audio file:006_ArethaFranklin-Who'sZoomin'Who-10s-B.wav
AUDIO: Audio file:007_RyCooder-BuenoVistaSocialClub-10s-B.wav
AUDIO: Audio file:008_Anouk-It'sSoHard-10s-B.wav
AUDIO: Audio file:008_BobSchneider-MoonSong-10s-B.wav
AUDIO: Audio file:010_TitoPuente&Santana-OyeComoVa-10s-A.wav
AUDIO: Audio file:014_AlanisMorissette-21Things-10s-A.wav
AUDIO: Audio file:015_DavidBowie-PanicInDetroit-10s-B.wav
AUDIO: Audio file:016_DireStraits-SultansOfSwing-10s-B.wav
AUDIO: Audio file:016_EverythingButTheGirl-TroubledMind-10s-B.wav
AUDIO: Audio file:023_FrankSinatra-SummerWind-10s-B.wav
AUDIO: Audio file:023_JimiHendrix-VoodooChile(SlightReturn)-10s-B.wav
AUDIO: Audi