In [None]:
import pandas as pd
import numpy as np
import math
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC 
import random
from sklearn.mixture import GaussianMixture

import librosa
import librosa.display

import os

sampling_rate = 0
path = './data'

train_data = []
train_label = []
test_data = []
test_label = []

result = []

def getpath(file):
    return os.path.join(path, file)

def cut_file(data, index):
    global sampling_rate
    
    train = []
    test = []
    
    index = index % 4
    
    for i in range(0, sampling_rate * 10):
        if int(sampling_rate * 2.5 * index) <= i and i < int(sampling_rate * 2.5 * (index + 1)):
            test.append(data[i])
        else:
            train.append(data[i])
    
    return train, test

def load_wav_files_from_path():
    global mfcc_data, train_data, test_data, train_label, test_label
    
    files = os.listdir('./data')
    
    for i, file in enumerate(files):
        file_data, sr = librosa.load(getpath(file), sr=sampling_rate)
        file_data = file_data[:sampling_rate * 10]
        
        train, test = cut_file(file_data, i)
        
        train = np.array(train)
        test = np.array(test)
        
        mfcc_train = librosa.feature.mfcc(y=train, sr=sampling_rate, n_mfcc=80, hop_length = int(sampling_rate / 100))
        mfcc_test = librosa.feature.mfcc(y=test, sr=sampling_rate, n_mfcc=80, hop_length = int(sampling_rate / 100))
        
        mfcc_train = mfcc_train.T
        mfcc_test = mfcc_test.T
        
        for mfcc_train_datum in mfcc_train:
            train_data.append(mfcc_train_datum)
            train_label.append(i)
            
        for mfcc_test_datum in mfcc_test:
            test_data.append(mfcc_test_datum)
            test_label.append(i)
            
def shuffle_data():
    global train_data, train_label, test_data, test_label
    
    train_comp = list(zip(train_data, train_label))
    test_comp = list(zip(test_data, test_label))
        
    random.shuffle(train_comp)
    random.shuffle(test_comp)
        
    train_data, train_label = zip(*train_comp)
    test_data, test_label = zip(*test_comp)
        
    train_data = list(train_data)
    train_label = list(train_label)
    
    test_data = list(test_data)
    test_label = list(test_label)
    
    train_data = np.array(train_data)
    train_label = np.array(train_label)
    
    test_data = np.array(test_data)
    test_label = np.array(test_label)
    
def try_svc():
    global train_data, train_label, test_data, result
    
    svc = SVC(C=1.0, kernel='rbf', random_state=0)
    svc.fit(train_data, train_label)
    
    result = list(svc.predict(test_data))
    
def try_gmm():
    global train_data, train_label, test_data, test_label, result
    n_classes = 10
    
    estimator = GaussianMixture(n_components=n_classes,
              covariance_type='tied', max_iter=10)
    
    estimator.means_init = np.array([train_data[train_label == i].mean(axis=0)
                                    for i in range(n_classes)])
        
    estimator.fit(train_data)
    result = estimator.predict(test_data)
    
def print_score():
    global result, test_label
    
    conf = np.zeros((10, 10))
    
    for i in range(len(result)):
        conf[result[i]][test_label[i]] += 1
    print(conf)
    
    no_correct = 0
    
    for i in range(10):
        no_correct += conf[i][i]
        
    accuracy = no_correct / len(result)
    
    print(accuracy)
    
def main():
    global sampling_rate, path, train_data, test_data, train_label, test_label
    train_type = ''
    
    sampling_rate = 16000
    path = './data'
    
    load_wav_files_from_path()
    shuffle_data()
    
    while(train_type != 'exit'):
        train_type = input('(svc | gmm): ')
        
        if(train_type.lower() != 'svc' and train_type.lower() != 'gmm'):
            continue
        
        if(train_type == 'svc'):
            try_svc()
            
        elif(train_type == 'gmm'):
            try_gmm()
            
        print_score()

if __name__ == '__main__':
    main()