In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import librosa
import collections 

from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

In [2]:
unit_len = 1*22050
sub_len = 10*unit_len
step = 8*unit_len
bands = 20
languages = ['english','hindi','urdu','telugu','punjabi','malayalam','assamese','bengali','marathi','odiya','tamil','kannada']

In [3]:
def feature_normalize(dataset):
    mu = np.mean(dataset, axis=0)
    dataset -= mu 
    maxx = np.max(abs(dataset), axis=0)
    return dataset / maxx

In [4]:
def energy(samples):
    return np.sum(np.power(samples, 2.)) / float(len(samples))

In [5]:
def remove_silence(sound_clip, silence_len=1):
    new_clip = np.asarray([])
    sub_len = int(silence_len*22050)
    for i in range(0, len(sound_clip), sub_len):
        e = energy(sound_clip[i:i+sub_len])
        if e>0.001:
            new_clip=  np.concatenate((new_clip,sound_clip[i:i+sub_len]))
    return new_clip

In [6]:
def CountFrequency(arr): 
    return collections.Counter(arr) 

## Get model

In [75]:
from keras.models import load_model
model = load_model('./data-pickle/model_3_cnn.h5')

In [76]:
with open("./data-pickle/y_test.pkl","rb") as f:
    y_test = pickle.load(f)

In [80]:
with open("./data-pickle/result_y_pred.pkl","rb") as f:
    y_pred = pickle.load(f)
y_pred = np.argmax(y_pred,axis = 1)

In [85]:
cm = confusion_matrix(y_test,y_pred)
cm

array([[324,  29,  57,   0,   0,   2,   0,   0,   0,   0,   0,   0],
       [107, 179, 102,   0,   0,   5,   0,   1,   0,   0,   0,   0],
       [ 35,   3, 322,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0, 205,   1,  15,   6,   0,   0,   4,   2,   0],
       [  1,   0,   0,   0, 272,   2,   6,   1,   3,   0,   0,   0],
       [  8,   3,   1,   1,   0, 149,  20,   0,  10,   0,  35,   3],
       [  0,   0,   0,   1,   6,   3, 120,   0,   3,   0,  11,   0],
       [  8,   4,   0,   2,   7,   0,   0, 204,   2,   0,   3,   0],
       [  0,   0,   0,   6,   6,   3,   6,   2, 186,   0,   5,   0],
       [  0,   0,   0,   0,   1,   0,   0,   0,   3, 221,   0,   0],
       [  0,   0,   0,   0,   0,   9,   0,   0,   5,   0, 214,   1],
       [  0,   0,   0,   0,   0,   2,   0,   0,   1,   1,   2, 231]])

## Get File

In [148]:
# file_path = './unknown-data/unknown_wav/punjabi-2.wav'
file_path = './data/punjabi_wav/Regional-Chandigarh-Punjabi-1820-2019120194228.wav'

mfccs = []
sound_clip,sr = librosa.load(file_path)
sound_clip = feature_normalize(sound_clip)
sound_clip = remove_silence(sound_clip[unit_len*30:])
for i in range(0, len(sound_clip) - sub_len, step):
    mfcc = librosa.feature.mfcc(y=sound_clip[i: i + sub_len], sr=sr, n_mfcc = bands).T 
    mfccs.append(mfcc)

In [149]:
X = np.asarray(mfccs)
X = X.reshape(-1,431,20,1)
X.shape

(63, 431, 20, 1)

In [150]:
y = model.predict(X)

In [151]:
y = np.argmax(y,axis = 1)

In [152]:
result = [languages[i] for i in y]
result

['punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi',
 'punjabi']

In [153]:
counter = CountFrequency(result) 
print(counter.keys())
print(counter.values())

dict_keys(['punjabi'])
dict_values([63])
