In [151]:
import pandas as pd
import numpy as np
import math
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC 
import random
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import GridSearchCV
import librosa
import librosa.display

import os

from itertools import product
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [141]:
class Audio:
    def __init__(self, dir_path, sampling_rate, mfcc_num):
        self.dir_path = dir_path
        self.sampling_rate = sampling_rate
        self.mfcc_num = mfcc_num
        self.seed = 42

        self.x_train, self.y_train, self.x_test, self.y_test = [], [], [], []
        
        for index, file in enumerate(os.listdir(dir_path)):
            data, _ = librosa.load(f"{self.dir_path}/{file}",sr=self.sampling_rate)

            train, test = self.fileCut(data[:self.sampling_rate*10], index)

            for data in self.toMfcc(train):
                self.x_train.append(data)
                self.y_train.append(index)

            for data in self.toMfcc(test):
                self.x_test.append(data)
                self.y_test.append(index)
                
        self.dataShuffle()
            
    def fileCut(self, data, index):
        train = []
        test = []
        
        index = index % 4
        
        for i in range(0, self.sampling_rate * 10):
            if int(self.sampling_rate * 2 * index) <= i and i < int(self.sampling_rate * 2 * (index + 1)):
                test.append(data[i])
            else:
                train.append(data[i])
        
        return np.array(train), np.array(test)

    def toMfcc(self, data):
        data_mfcc = librosa.feature.mfcc(y=data, sr=self.sampling_rate, n_mfcc=self.mfcc_num, hop_length=int(self.sampling_rate/100))
        return data_mfcc.T

    def dataShuffle(self):
        train = list(zip(self.x_train, self.y_train))
        test = list(zip(self.x_test, self.y_test))

        random.shuffle(train)
        random.shuffle(test)
        
        x_train, y_train = zip(*train)
        x_test, y_test = zip(*test)

        self.x_train = np.array(list(x_train))
        self.y_train = np.array(list(y_train))
        self.x_test = np.array(list(x_test))
        self.y_test = np.array(list(y_test))
        

    def gmmModel(self, params=None):
        class_num = 10
        if params == None:
            estimator = GaussianMixture(n_components=class_num, covariance_type='tied', max_iter=10, random_state=self.seed)
            estimator.means_init = np.array([self.x_train[self.y_train == i].mean(axis=0) for i in range(class_num)])
        else:
            estimator = GaussianMixture(n_components=class_num, max_iter=10, random_state=self.seed, **params)
            estimator.means_init = np.array([self.x_train[self.y_train == i].mean(axis=0) for i in range(class_num)])

        estimator.fit(self.x_train)

        return estimator.predict(self.x_test)


    def svcModel(self, params=None):
        if params == None:
            model = SVC(C=1.0, kernel='rbf', random_state=self.seed)
        else:
            model = SVC(random_state=self.seed, **params)

        model.fit(self.x_train, self.y_train)

        return list(model.predict(self.x_test))
    
    def scoreMatrix(self, predict):
        conf_mat = np.zeros((10,10))
        for i in range(len(predict)): conf_mat[predict[i]][self.y_test[i]] +=1
        # print("confuse matrix")
        # print(conf_mat)
        no_correct = 0
        for i in range(10): no_correct += conf_mat[i][i]
        accuracy = no_correct/len(predict)
        # print("\n\n accuracy")
        # print(no_correct/len(predict))
        return conf_mat, accuracy

    def scoring(self, predict):
        conf_mat = np.zeros((10,10))
        for i in range(len(predict)): conf_mat[predict[i]][self.y_test[i]] +=1
        no_correct = 0
        for i in range(10): no_correct += conf_mat[i][i]
        accuracy = no_correct/len(predict)
        return accuracy

    def tuning(self, model_type:str, params):
        best_acc = 0
        best_parmas = None

        grid_list = [dict(zip(params, v)) for v in product(*params.values())]
        
        if model_type == 'gmm':
            class_num = 10
            means_array = np.array([self.x_train[self.y_train == i].mean(axis=0) for i in range(class_num)])
            for grid in tqdm(grid_list):
                estimator = GaussianMixture(n_components=class_num, max_iter=10, random_state=self.seed, **grid)
                estimator.means_init = means_array
                estimator.fit(self.x_train)
                acc = self.scoring(list(estimator.predict(self.x_test)))
                if best_acc <= acc:
                    best_acc = acc
                    best_parmas = grid

            return best_acc, best_parmas
        
        elif model_type == 'svc':
            for grid in tqdm(grid_list):
                model = SVC(random_state=self.seed, **grid)
                model.fit(self.x_train, self.y_train)
                acc = self.scoring(list(model.predict(self.x_test)))
                if best_acc <= acc:
                    best_acc = acc
                    best_parmas = grid
            
            return best_acc, best_parmas
        
        else: return None  

In [153]:
random.seed(42)
audio_obj = Audio('./test_data2', 16000, 40)
    
svc_parameters = {'kernel': ('linear','rbf','poly','sigmoid'), 'C':[0.5,1,2,3]}
gmm_parmas = {'covariance_type':('full','diag','tied','spherical'), 'tol':(1e-1, 1e-2, 1e-3, 1e-4)}

print('svc hyper parameter tuning ----')
svc_best_acc, svc_best_parmas = audio_obj.tuning('svc', svc_parameters)
print(f"best_acc : {svc_best_acc},\nbest_params : {svc_best_parmas}", end='\n\n')

print('gmm hyper parameter tuning ----')
gmm_best_acc, gmm_best_parmas = audio_obj.tuning('gmm', gmm_parmas)
print(f"best_acc : {gmm_best_acc},\nbest_params : {gmm_best_parmas}", end='\n\n')

print('svc fit ----')
svc_model_pred = audio_obj.svcModel(svc_best_parmas)
print(audio_obj.scoreMatrix(svc_model_pred), end='\n\n')

print('gmm fit ----')
gmm_model_pred = audio_obj.gmmModel(gmm_best_parmas)
print(audio_obj.scoreMatrix(gmm_model_pred))

svc hyper parameter tuning ----


100%|██████████| 16/16 [00:26<00:00,  1.64s/it]


best_acc : 0.9398009950248756,
best_params : {'kernel': 'rbf', 'C': 2}

gmm hyper parameter tuning ----


100%|██████████| 16/16 [00:12<00:00,  1.30it/s]


best_acc : 0.6756218905472637,
best_params : {'covariance_type': 'tied', 'tol': 0.1}

svc fit ----
(array([[173.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0., 197.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   4., 201.,   0.,   0.,   3.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0., 198.,   7.,   9.,   0.,   0.,  30.,   0.],
       [  1.,   0.,   0.,   0., 180.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   7., 174.,   2.,   0.,   0.,   1.],
       [ 10.,   0.,   0.,   0.,   0.,   0., 196.,   0.,   0.,   0.],
       [ 17.,   0.,   0.,   3.,   0.,   0.,   0., 201.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   3.,   9.,   0.,   0., 169.,   0.],
       [  0.,   0.,   0.,   0.,   4.,   6.,   3.,   0.,   2., 200.]]), 0.9398009950248756)

gmm fit ----
(array([[142.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,  40.,   3.,   0.,  10.,   8.,   3.,   0.,   0.,   0.],
       [  0.,   0., 144.,   0.,   0

In [161]:
random.seed(42)
audio_obj = Audio('./data', 16000, 40)
    
svc_parameters = {'kernel': ('linear','rbf','poly','sigmoid'), 'C':[0.5,1,2,3]}
gmm_parmas = {'covariance_type':('diag','tied'), 'tol':(1e-1, 1e-2, 1e-3, 1e-4)}

print('svc hyper parameter tuning ----')
svc_best_acc, svc_best_parmas = audio_obj.tuning('svc', svc_parameters)
print(f"best_acc : {svc_best_acc},\nbest_params : {svc_best_parmas}", end='\n\n')

print('gmm hyper parameter tuning ----')
gmm_best_acc, gmm_best_parmas = audio_obj.tuning('gmm', gmm_parmas)
print(f"best_acc : {gmm_best_acc},\nbest_params : {gmm_best_parmas}", end='\n\n')

print('svc fit ----')
svc_model_pred = audio_obj.svcModel(svc_best_parmas)
print(audio_obj.scoreMatrix(svc_model_pred), end='\n\n')

print('gmm fit ----')
gmm_model_pred = audio_obj.gmmModel(gmm_best_parmas)
print(audio_obj.scoreMatrix(gmm_model_pred))

svc hyper parameter tuning ----


100%|██████████| 16/16 [01:08<00:00,  4.26s/it]


best_acc : 0.8940298507462686,
best_params : {'kernel': 'rbf', 'C': 3}

gmm hyper parameter tuning ----


100%|██████████| 8/8 [00:04<00:00,  1.97it/s]


best_acc : 0.6104477611940299,
best_params : {'covariance_type': 'tied', 'tol': 0.1}

svc fit ----
(array([[168.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0., 197.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   4., 201.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0., 198.,  12.,  10.,   1.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0., 189.,   0.,   0.,   0.,   0.,   0.],
       [ 10.,   0.,   0.,   0.,   0., 190.,  13.,   1.,  25.,   0.],
       [ 13.,   0.,   0.,   0.,   0.,   0., 177.,   4.,   5.,   0.],
       [  5.,   0.,   0.,   0.,   0.,   1.,   0., 167.,  56.,   3.],
       [  0.,   0.,   0.,   0.,   0.,   0.,  10.,  29., 112.,   0.],
       [  5.,   0.,   0.,   3.,   0.,   0.,   0.,   0.,   3., 198.]]), 0.8940298507462686)

gmm fit ----
(array([[139.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,  76.,   0.,   0.,   3.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0., 146.,   0.,   8

In [162]:
random.seed(42)
audio_obj = Audio('./test_data2', 16000, 70)
    
svc_parameters = {'kernel': ('linear','rbf','poly','sigmoid'), 'C':[0.5,1,2,3]}
gmm_parmas = {'covariance_type':('full','diag','tied','spherical'), 'tol':(1e-1, 1e-2, 1e-3, 1e-4)}

print('svc hyper parameter tuning ----')
svc_best_acc, svc_best_parmas = audio_obj.tuning('svc', svc_parameters)
print(f"best_acc : {svc_best_acc},\nbest_params : {svc_best_parmas}", end='\n\n')

print('gmm hyper parameter tuning ----')
gmm_best_acc, gmm_best_parmas = audio_obj.tuning('gmm', gmm_parmas)
print(f"best_acc : {gmm_best_acc},\nbest_params : {gmm_best_parmas}", end='\n\n')

print('svc fit ----')
svc_model_pred = audio_obj.svcModel(svc_best_parmas)
print(audio_obj.scoreMatrix(svc_model_pred), end='\n\n')

print('gmm fit ----')
gmm_model_pred = audio_obj.gmmModel(gmm_best_parmas)
print(audio_obj.scoreMatrix(gmm_model_pred))

svc hyper parameter tuning ----


100%|██████████| 16/16 [00:30<00:00,  1.89s/it]


best_acc : 0.9442786069651742,
best_params : {'kernel': 'rbf', 'C': 2}

gmm hyper parameter tuning ----


100%|██████████| 16/16 [00:20<00:00,  1.26s/it]


best_acc : 0.8159203980099502,
best_params : {'covariance_type': 'tied', 'tol': 0.01}

svc fit ----
(array([[177.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0., 197.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   4., 201.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0., 198.,   8.,   6.,   0.,   0.,  25.,   0.],
       [  0.,   0.,   0.,   0., 170.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.,  22., 190.,   4.,   0.,   0.,   0.],
       [ 10.,   0.,   0.,   0.,   0.,   0., 194.,   0.,   0.,   0.],
       [ 14.,   0.,   0.,   3.,   0.,   0.,   0., 201.,   0.,   0.],
       [  0.,   0.,   0.,   0.,   1.,   0.,   0.,   0., 176.,   7.],
       [  0.,   0.,   0.,   0.,   0.,   5.,   3.,   0.,   0., 194.]]), 0.9442786069651742)

gmm fit ----
(array([[153.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.],
       [ 13., 194.,  56.,   0.,  99.,   0.,   0.,   0.,   0.,   0.],
       [  0.,   0., 144.,   0.,   