In [1]:
import os
import glob
import numpy as np
import time
from tqdm import tqdm
from scipy import signal
from scipy.io import wavfile
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import librosa

In [2]:
unit_len = 1*22050
sub_len = 10*unit_len
step = 8*unit_len
labels = []
mfccs = []
bands = 20

In [3]:
def feature_normalize(dataset):
    mu = np.mean(dataset, axis=0)
    dataset -= mu 
    maxx = np.max(abs(dataset), axis=0)
    return dataset / maxx

In [4]:
def energy(samples):
    return np.sum(np.power(samples, 2.)) / float(len(samples))

In [5]:
def remove_silence(sound_clip, silence_len=1):
    new_clip = np.asarray([])
    sub_len = int(silence_len*22050)
    for i in range(0, len(sound_clip), sub_len):
        e = energy(sound_clip[i:i+sub_len])
        if e>0.001:
            new_clip=  np.concatenate((new_clip,sound_clip[i:i+sub_len]))
    return new_clip

In [6]:
path = './data/'
languages = ['english','hindi','urdu','telugu','punjabi','malayalam','assamese','bengali','marathi','odiya','tamil','kannada']

for j in range(len(languages)):
    language = languages[j]
    files_path = glob.glob(path + language + "_wav/*.wav")    
    files_path = files_path[:25]
    
    # skip size
    skip = 180
    if(language == 'assamese'):
        skip= 30
    elif(language == 'punjabi'):
        skip= 30
    
    print('loading files of :', language)
    time.sleep(0.4)
    for file_path in tqdm(files_path):
        sound_clip,sr = librosa.load(file_path)
        sound_clip = feature_normalize(sound_clip)
        sound_clip = remove_silence(sound_clip[unit_len*30:-unit_len*skip])
        for i in range(0, len(sound_clip) - sub_len, step):
            mfcc = librosa.feature.mfcc(y=sound_clip[i: i + sub_len], sr=sr, n_mfcc = bands).T 
            mfccs.append(mfcc)
            labels.append(j)

loading files of : english


100%|██████████| 25/25 [02:43<00:00,  6.55s/it]


loading files of : hindi


100%|██████████| 25/25 [02:45<00:00,  6.71s/it]


loading files of : urdu


100%|██████████| 25/25 [02:16<00:00,  5.68s/it]


loading files of : telugu


100%|██████████| 25/25 [01:00<00:00,  2.44s/it]


loading files of : punjabi


100%|██████████| 25/25 [01:39<00:00,  4.03s/it]


loading files of : malayalam


100%|██████████| 25/25 [01:08<00:00,  2.78s/it]


loading files of : assamese


100%|██████████| 25/25 [00:29<00:00,  1.15s/it]


loading files of : bengali


100%|██████████| 25/25 [01:06<00:00,  2.68s/it]


loading files of : marathi


100%|██████████| 25/25 [01:05<00:00,  2.68s/it]


loading files of : odiya


100%|██████████| 25/25 [01:06<00:00,  2.63s/it]


loading files of : tamil


100%|██████████| 25/25 [01:09<00:00,  2.82s/it]


loading files of : kannada


100%|██████████| 25/25 [01:13<00:00,  2.86s/it]


In [7]:
features = np.asarray(mfccs)

In [8]:
features.shape

(15963, 431, 20)

In [9]:
labels = np.asarray(labels)
labels.shape

(15963,)

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=0.2, random_state=10)

In [11]:
with open("./data-pickle/X_train.pkl","wb") as f:
    pickle.dump(X_train,f)

In [12]:
with open("./data-pickle/X_test.pkl","wb") as f:
    pickle.dump(X_test,f)

In [13]:
with open("./data-pickle/y_train.pkl","wb") as f:
    pickle.dump(y_train,f)

In [14]:
with open("./data-pickle/y_test.pkl","wb") as f:
    pickle.dump(y_test,f)