In [26]:
# import librosa
# import python_speech_features
import numpy as np
import pandas as pd
import IPython.display as ipd
import scipy.io.wavfile as wav
from scipy.fftpack import dct
from zipfile import ZipFile
import matplotlib.pyplot as plt
from scipy.stats import skew
from sklearn import preprocessing, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import GridSearchCV, train_test_split
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
#data import
def audio_import(nclass, naudio):
    rate, data = wav.read('Read_Up/IDR' + str(nclass) + '/' + str(naudio) + '.wav')
    filename = 'Data/IDR' + str(nclass) + '/' + str(naudio) + '.csv'
    return rate, data, filename

#parameters
framelength, framestride, nfft, num_fbanks, n_cep_coeff, N = 0.025, 0.015, 512, 40, 12, 2

#generate frames
def frames_gen(rate, data, framelength, framestride):
    frmlen, frmstrd, signallen = int(round(rate*framelength)), int(round(rate*framestride)), len(data)
    paddinglen = frmstrd - (signallen - frmlen) % frmstrd #making number of frames even
    paddedsig = np.concatenate((data, np.zeros(paddinglen)), axis = 0)
    paddedsiglen = len(paddedsig)
    nframes = int(np.floor((paddedsiglen - frmlen)/frmstrd) + 1)
    indices = np.tile(np.arange(0, frmlen), (nframes, 1)) + np.tile((np.arange(0, nframes*frmstrd, frmstrd)), (frmlen, 1)).T
    frames = paddedsig[indices]
    return frames, frmlen

#apply hamming window to each frame
def hamming_window(frames, frmlen):
    frames *= np.hamming(frmlen)
    return frames

#convert each windowed frame into a power spectrum
def periodogram_gen(frames, nfft):
    frame_fft = np.absolute(np.fft.rfft(frames, n = nfft, axis = 1))
    frame_periodogram = np.square(frame_fft)/nfft
    return frame_periodogram

#helper functions
def freq_to_mel(freq):
    return 2595*np.log10(1+freq/700)
def mel_to_freq(mel):
    return 700*(np.power(10, mel/2595) - 1)

# making mel-scale filterbank
def filter_bank_gen(rate, num_fbanks, nfft):
    #for x filter banks, we need x+2 mel points
    low_mel_lim = 0
    up_mel_lim = freq_to_mel(rate/2)
    mel_range = np.linspace(0, up_mel_lim, num_fbanks + 2)
    freq_range = mel_to_freq(mel_range)
    bins = np.floor((nfft + 1) * freq_range/rate)
    fbank = np.zeros((num_fbanks, int(np.floor(nfft/2 + 1))))
    for m in range(1, num_fbanks + 1):
        lower = int(bins[m - 1]) # lower
        peak = int(bins[m]) # peak
        upper = int(bins[m + 1]) # upper
        for k in range(lower, peak):
            fbank[m - 1, k] = (k - bins[m - 1])/(bins[m] - bins[m - 1])
        for k in range(peak, upper):
            fbank[m - 1, k] = (bins[m + 1] - k)/(bins[m + 1] - bins[m])
    return fbank

# filtered frames
def filtered_frame_gen(frame_periodogram, fbank):
    #multiply each frame with all filterbanks and add up for coefficients.
    filter_banks = np.dot(frame_periodogram, fbank.T)
    #for numerical stability
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) #if condition is true, return eps, else return original val
    filter_banks = 20*np.log10(filter_banks)
    return filter_banks

#make mfcc coefficients
def mfcc_gen(filter_banks, n_cep_coeff):
    mfcc = dct(filter_banks, type = 2, axis = 1, norm = 'ortho')[:, 1:(n_cep_coeff + 1)]
    return mfcc  

#make delta and delta-delta coefficients
def ctpn(n_cep_coeff, coeff_type, t, n):
    if((t+n) > n_cep_coeff-1):
        return coeff_type[:,n_cep_coeff-1]
    elif(0 <= (t+n) <= n_cep_coeff-1):
        return coeff_type[:, t+n]

def ctmn(n_cep_coeff, coeff_type, t, n):
    if((t-n) < 0):
        return coeff_type[:,0]
    elif(0 <= (t-n) <= n_cep_coeff-1):
        return coeff_type[:, t-n]  
    
def deltacoeff(t, coeff_type):
    dt = 0
    for n in range(1,N):
        dt+= n*(ctpn(n_cep_coeff, coeff_type, t, n) - ctmn(n_cep_coeff, coeff_type, t, n))/2*np.square(n)
    return dt

def deltacoeff_gen(coeff_type, n_cep_coeff):
    deltacoef = np.zeros(coeff_type.shape)
    for t in range(0, n_cep_coeff):
        dt = deltacoeff(t, coeff_type)
        deltacoef[:, t] = dt
    return deltacoef

def deltadeltacoeff_gen(deltacoef, n_cep_coeff):
    deltadeltacoef = np.zeros(deltacoef.shape)
    for t in range(0, n_cep_coeff):
        ddt = deltacoeff(t, deltacoef)
        deltadeltacoef[:, t] = ddt
    return deltadeltacoef

In [None]:
def csv_data_gen(Data, framelength = framelength, framestride = framestride, nfft = nfft, num_fbanks = num_fbanks, n_cep_coeff = n_cep_coeff):
    for nclass in range(1,10):
        for naudio in range(1, 68):
            #calculating mfcc
            rate, data, filename = audio_import(nclass, naudio)
            frames, frmlen = frames_gen(rate, data, framelength, framestride)
            frames = hamming_window(frames, frmlen)
            frame_periodogram = periodogram_gen(frames, nfft)
            fbank = filter_bank_gen(rate, num_fbanks, nfft)
            filter_banks = filtered_frame_gen(frame_periodogram, fbank)
            mfcc = mfcc_gen(filter_banks, n_cep_coeff)
            #calculating delta_coefficients 
#             mfcc = librosa.feature.mfcc(data.astype(float), sr = rate, n_mfcc=12).T
#             mfcc = python_speech_features.base.mfcc(data, rate, winlen = 0.025, winstep = 0.015, nfilt = 40, nfft = 512, numcep = 12, preemph = 0)
            mfcc = mfcc - np.mean(mfcc, axis = 0)
            delta_coef = deltacoeff_gen(mfcc, n_cep_coeff)
            deltadelta_coef = deltadeltacoeff_gen(delta_coef, n_cep_coeff)
            print(mfcc.shape,nclass,naudio)
#             Data = Data.append(pd.Series(np.hstack((np.mean(mfcc, axis = 0), np.mean(delta_coef, axis = 0), nclass))), ignore_index = True)
            
#     Data.columns = ['MFCC_mean' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_mean' + str(x) for x in range(0, n_cep_coeff)] + ['Dialect']
#     return Data
            Data = Data.append(pd.Series(np.hstack(
                (np.mean(mfcc, axis = 0), np.max(mfcc, axis = 0), np.min(mfcc, axis = 0), np.std(mfcc, axis = 0), np.median(mfcc, axis = 0), skew(mfcc, axis = 0), 
                 np.mean(delta_coef, axis = 0), np.max(delta_coef, axis = 0), np.min(delta_coef, axis = 0), np.std(delta_coef, axis = 0), np.median(delta_coef, axis = 0), skew(delta_coef, axis = 0), 
                 np.mean(deltadelta_coef, axis = 0), np.max(deltadelta_coef, axis = 0), np.min(deltadelta_coef, axis = 0), np.std(deltadelta_coef, axis = 0), np.median(deltadelta_coef, axis = 0), skew(deltadelta_coef, axis = 0), nclass))), ignore_index = True)
            
    Data.columns = ['MFCC_mean' + str(x) for x in range(0, n_cep_coeff)] + ['MFCC_max' + str(x) for x in range(0, n_cep_coeff)] + ['MFCC_min' + str(x) for x in range(0, n_cep_coeff)] + ['MFCC_std' + str(x) for x in range(0, n_cep_coeff)] + ['MFCC_median' + str(x) for x in range(0, n_cep_coeff)] + ['MFCC_skew' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_mean' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_max' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_min' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_std' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_median' + str(x) for x in range(0, n_cep_coeff)] + ['DEL_skew' + str(x) for x in range(0, n_cep_coeff)] + ['DELDEL_mean' + str(x) for x in range(0, n_cep_coeff)] + ['DELDEL_max' + str(x) for x in range(0, n_cep_coeff)] + ['DELDEL_min' + str(x) for x in range(0, n_cep_coeff)] + ['DELDEL_std' + str(x) for x in range(0, n_cep_coeff)] + ['DELDEL_median' + str(x) for x in range(0, n_cep_coeff)] + ['DELDEL_skew' + str(x) for x in range(0, n_cep_coeff)] + ['Speaker']
    return Data

In [None]:
Data = pd.DataFrame()
Data = csv_data_gen(Data)

In [None]:
Data.to_csv('data.csv', index = False)

In [None]:
data = pd.read_csv('data.csv')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:,-1], test_size = 0.15)

In [None]:
def normalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)
    return result

In [None]:
X_train = normalize(X_train)
X_test = normalize(X_test)

In [None]:
# lr_clf = LogisticRegression(
#     random_state = 200,
#     max_iter = 1000,
#     verbose = 1,
#     n_jobs = -1,
#     solver = 'newton-cg'
# )
# lr_clf.fit(X_train, y_train)
# predicted = lr_clf.predict_proba(X_test)

# knn_clf = KNeighborsClassifier(
#     n_neighbors = 5,
#     n_jobs = -1,
#     leaf_size = 100
# )
# knn_clf.fit(X_train, y_train)
# predicted = knn_clf.predict_proba(X_test)

# svc_clf = svm.SVC(
#     kernel = 'linear',
#     verbose = True,
#     random_state = True
# )
# svc_clf.fit(X_train, y_train)
# pred_labels = svc_clf.predict(X_test)

clf = svm.SVC(kernel = 'linear', probability=True, C = 10, gamma = 0.1)
clf.fit(X_train, y_train)
print(accuracy_score(clf.predict(X_test), y_test))
# C_grid = [0.001, 0.01, 0.1, 1, 10]
# gamma_grid = [0.001, 0.01, 0.1, 1, 10]
# param_grid = {'C': C_grid, 'gamma' : gamma_grid}

# grid = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, cv = 3, scoring = "accuracy")
# grid.fit(X_train, y_train)

# # Find the best model
# print(grid.best_score_)

# print(grid.best_params_)

# print(grid.best_estimator_)

In [None]:
pred_labels = predicted.argmax(axis = 1)
pred_labels

In [None]:
print(("Accuracy score")+str(accuracy_score(y_test, pred_labels)))