In [2]:
import numpy as np
import os
import sys
import scipy.io.wavfile
from scipy import signal


class AudioFile:
    spectrogram = None
    data = None
    rate = None
    title = None

    def __init__(self, rate, data, title):
        self.data = data
        self.rate = rate
        self.title = title
        self.spectrogram = None

    def get_spectrogram(self):
        if self.spectrogram is not None: return self.spectrogram

        f, t, sxx = signal.spectrogram(self.data, self.rate, 'hamming', 512, 160, 512)
        self.spectrogram = 10 * np.log10(sxx + sys.float_info.min)
        return self.spectrogram

    def get_feature_mean(self, base):
        mean_vector = np.zeros(base().shape[1])
        coefficient_on_base = np.dot(self.spectrogram().T, base)
        mean_vector += np.mean(coefficient_on_base, 0)
        return mean_vector

    def get_error_base(self, base):
        coefficient_on_base = np.dot(self.spectrogram().T, base)
        spectrogram_ric = np.dot(coefficient_on_base, base.T).T
        error = np.linalg.norm(self.spectrogram() - spectrogram_ric)
        return error


class AudioCollection:
    collection = []
    spectrogram = None
    base = None

    def __init__(self, path):
        for filename in os.listdir(path):
            rate, data = scipy.io.wavfile.read(path + filename)
            self.collection.append([AudioFile(rate, data, filename)])

    def __str__(self):
        to_str = 'AUDIO_COLLECTION {id:'+str(id(self))+',len:'+str(len(self.collection))+'}:\n'
        for audio in self.collection:
            to_str += str(audio) + '\n'
        return to_str

    def get_spectrogram(self):
        if self.spectrogram is not None:
            return self.spectrogram
        spectrogram = None
        for file in self.collection:
            if spectrogram is not None:
                spectrogram = file.spectrogram
            else:
                scipy.hstack((spectrogram, file.spectrogram))
        self.spectrogram = spectrogram
        return self.spectrogram

    def get_base(self, percent):
        if self.base is not None:
            return self.base
        u, s, v = np.linalg.svd(self.get_spectrogram())
        energy_e_value = np.sum(s)
        current_sum = 0
        percent_sum = energy_e_value * percent / 100
        i = 0
        for i in range(len(s)):
            current_sum += s[i]
            if current_sum >= percent_sum:
                break
        self.base = u[:, range(0, i)]
        return self.base

    def get_feature_mean(self, energy_percent):
        i = 0
        mean_vector = np.zeros(self.get_base(energy_percent).shape[1])
        for i in range(len(self.collection)):
            mean_vector += self.collection[i].getFeatureMean(self.get_base(energy_percent))
        return mean_vector / i


class AudioClassifier:
    collections = []
    unk_collection = []

    def __init__(self, path_collections, path_unk):
        print(path_collections)
        for path in path_collections:
            self.collections.append(AudioCollection(path))
        self.unk_collection = AudioCollection(path_unk)

        print(self.collections)

    def __str__(self):
        to_str = ''
        to_str += 'DATABASE:\n'
        for collection in self.collections:
            to_str += str(collection)

        to_str += 'UNKNOWNS:\n'
        to_str += str(self.unk_collection)
        return to_str

    def classify_by_reconstruction_error(self):
        for unk in self.unk_collection.collection:
            for t_c in self.collections:
                unk.errorOnBase(t_c.base())
        return 0
    
    def classify_by_feature_mean_value(self):
        for unk in self.unk_collection.collection:
            for t_c in self.collections:
                unk.errorOnBase(t_c.base())
        return 0

In [3]:
path_db = 'm.conte/05_AudioClassifier_Pdf/05_AudioClassifier_Pdf/database/'
path_music = path_db+'music/'
path_speech = path_db+'speech/'
path_unknowns = path_db.replace('database/', '')+'unknownSounds/'

audio_classifier = AudioClassifier([path_music, path_speech], path_unknowns)
print(audio_classifier)

['m.conte/05_AudioClassifier_Pdf/05_AudioClassifier_Pdf/database/music/', 'm.conte/05_AudioClassifier_Pdf/05_AudioClassifier_Pdf/database/speech/']
[<__main__.AudioCollection object at 0x0000017AF950D780>, <__main__.AudioCollection object at 0x0000017AF9502358>]
DATABASE:
AUDIO_COLLECTION {id:1627680462720,len:48}:
[<__main__.AudioFile object at 0x0000017AF95022E8>]
[<__main__.AudioFile object at 0x0000017AF950DA20>]
[<__main__.AudioFile object at 0x0000017AF950DB70>]
[<__main__.AudioFile object at 0x0000017AF950DCC0>]
[<__main__.AudioFile object at 0x0000017AF950D128>]
[<__main__.AudioFile object at 0x0000017AF950D0F0>]
[<__main__.AudioFile object at 0x0000017AF950D6D8>]
[<__main__.AudioFile object at 0x0000017AF950DC18>]
[<__main__.AudioFile object at 0x0000017AF950DC50>]
[<__main__.AudioFile object at 0x0000017AF950DBA8>]
[<__main__.AudioFile object at 0x0000017AF950DDD8>]
[<__main__.AudioFile object at 0x0000017AF950DE48>]
[<__main__.AudioFile object at 0x0000017AF950DEF0>]
[<__mai