In [4]:
import numpy as np
import pandas as pd
import keras
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.layers import TimeDistributed, Bidirectional, Dense,Dropout, GRU
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation

MAX_SEQUENCE_NUM = 777


def mapPhones():
    phone2num = dict()
    with open('./48phone_char.map') as f:
        lines = f.readlines()
        for line in lines:
            line = line.rstrip('\n')           
            phones = line.split('\t')
            phone2num[phones[0]] = phones[1]
    phone39 = dict()
    with open('./48_39.map') as f:
        
        lines = f.readlines()
        for line in lines:
            line = line.rstrip('\n')
            phones = line.split('\t')
            phone39[phones[0]] = phones[1]
    
    labelList = []
    for key, value in phone39.items():
        label = phone2num[value]
        if(label not in labelList):
            labelList.append(label)
   
    
    return phone2num, phone39, labelList


def loadData(mfcc_path, labels_path):
    phone2num, phone39, labelList  = mapPhones()

    # fbank_features = ['fb_' + str(i) for i in range(0,69predict_classes)]
    mfcc_features = ['mfcc_' + str(i) for i in range(0,39)]
   

    df = None
    if(labels_path):
         # fbank_train = pd.read_csv('./fbank/train.ark', sep=' ', header = None, index_col=0, names = ['id'] + fbank_features)
        mfcc_data = pd.read_csv(mfcc_path, sep=' ', header = None, index_col=0, names = ['id'] + mfcc_features)
        labels = pd.read_csv(labels_path, sep=',', header = None, index_col=0, names = ['id', 'label'])
        df = pd.concat([mfcc_data, labels], axis=1)  
        # map label frome phone48 -> phone39 -> number -> index of number in labelList -> +1
        for key, value in phone39.items():        
            df.loc[df['label'] == key, 'label'] = labelList.index(phone2num[value]) + 1
    else:
        df = pd.read_csv(mfcc_path, sep=' ', header = None, index_col=0, names = ['id'] + mfcc_features)
    
    df['f_id'] = df.index
    df['fid'] = df['f_id'].apply(lambda x: x.split('_')[2])
    df[['fid']]= df[['fid']].apply(pd.to_numeric)
    df['f_name'] =  df['f_id'].apply(lambda x: x.split('_')[0] + '_' + x.split('_')[1])
    del df['f_id']

    df = df.sort_values(by=['f_name', 'fid'])
    df_g = df.groupby('f_name')
    df_g = np.array(list(df_g))
    
    df_g = np.delete(df_g, 0, 1)

    if(labels_path):
        X_data = []
        y_data = []

        for rows in (df_g):
            labels =  rows[0].as_matrix(['label'])        
            labels = to_categorical(labels, num_classes = 40)      
            mfcc = rows[0].as_matrix(mfcc_features)    
            # mfcc = preprocessing.scale(mfcc)
            padding_num = MAX_SEQUENCE_NUM - mfcc.shape[0]
            padding_zeros = np.zeros((padding_num, 39))
            
            padding_labels = np.zeros(padding_num)
            padding_labels = to_categorical(padding_labels, num_classes = 40)
            
            mfcc = np.concatenate((mfcc, padding_zeros), axis = 0)
            labels = np.concatenate((labels, padding_labels), axis = 0)
            X_data.append(mfcc)
            y_data.append(labels)
        X_data = np.asarray(X_data)
        y_data = np.asarray(y_data)
        # print(X_data[0].shape)
        # print(y_data[0].shape)
    
        return X_data, y_data, df_g
    else:
        X_data = []
        for rows in (df_g):
            
            mfcc = rows[0].as_matrix(mfcc_features)    
            # mfcc = preprocessing.scale(mfcc)
            padding_num = MAX_SEQUENCE_NUM - mfcc.shape[0]
            padding_zeros = np.zeros((padding_num, 39))   
            mfcc = np.concatenate((mfcc, padding_zeros), axis = 0)
            X_data.append(mfcc)
            return X_data, df  

def genModel(input_shape):
    model = Sequential()
    model.add(Bidirectional(GRU(200, return_sequences=True, activation='relu', dropout=0.4), input_shape=input_shape,))
    model.add(Bidirectional(GRU(200, return_sequences=True, activation='relu', dropout=0.4)))
    model.add(TimeDistributed(Dense(1024, activation='relu')))
    model.add(Dropout(0.3))
    model.add(TimeDistributed(Dense(512, activation='relu')))
    model.add(Dropout(0.3))
    model.add(TimeDistributed(Dense(40, activation='softmax')))
    model.summary()

    opt = keras.optimizers.adam(lr = 0.0001)
    model.compile(optimizer= opt,
                loss='categorical_crossentropy',
                metrics = ['accuracy']
                )
    return model

def train():
    X_data, y_data, df = loadData('./mfcc/train.ark', './label/train.lab')
    
    input_shape = (X_data.shape[1], X_data.shape[2])

    # test size and random seed
    tsize = 0.1
    rnState = 0
    X_train, X_valid, y_train, y_valid = train_test_split(X_data, y_data, test_size= tsize, random_state=rnState)
    
    model = genModel(input_shape)


    batchSize = 100
    epoch = 50
    model.fit(X_train, y_train, batch_size = batchSize,epochs = epoch)  
    model.save('model2.h5') 
    scores = model.evaluate(X_valid, y_valid, verbose=0)
    print(scores)




In [6]:
def testPredict():
    X_data, df = loadData('./mfcc/test.ark', None)
    print(X_data)
    return (df)
df = testPredict()

[array([[ 34.92973  , -29.39845  ,  -6.566813 , ...,  -1.174628 ,
         -0.9207578,  -0.4079923],
       [ 34.54451  , -28.62595  ,  -3.350186 , ...,  -2.003879 ,
         -1.750714 ,  -1.586505 ],
       [ 35.70015  , -29.0122   ,  -1.293243 , ...,  -1.74848  ,
         -1.748082 ,  -2.136411 ],
       ..., 
       [  0.       ,   0.       ,   0.       , ...,   0.       ,
          0.       ,   0.       ],
       [  0.       ,   0.       ,   0.       , ...,   0.       ,
          0.       ,   0.       ],
       [  0.       ,   0.       ,   0.       , ...,   0.       ,
          0.       ,   0.       ]])]


In [18]:
phone2num, phone39, labelList  = mapPhones()

# fbank_features = ['fb_' + str(i) for i in range(0,69predict_classes)]
mfcc_features = ['mfcc_' + str(i) for i in range(0,39)]


# fbank_train = pd.read_csv('./fbank/train.ark', sep=' ', header = None, index_col=0, names = ['id'] + fbank_features)
df = pd.read_csv('./mfcc/test.ark', sep=' ', header = None,  names = ['id'] + mfcc_features)
# map label frome phone48 -> phone39 -> number -> index of number in labelList -> +1




df['fid'] = df['id'].apply(lambda x: x.split('_')[2])
df[['fid']]= df[['fid']].apply(pd.to_numeric)
df['f_name'] =  df['id'].apply(lambda x: x.split('_')[0] + '_' + x.split('_')[1])
# del df['f_id']



df_g = df.groupby('f_name')

df_g = np.array(list(df_g))

df_g = np.delete(df_g, 0, 1)
df_g[0]
X_data = []
  

for rows in (df_g):
    mfcc = rows[0].as_matrix(mfcc_features)    
    # mfcc = preprocessing.scale(mfcc)
    padding_num = MAX_SEQUENCE_NUM - mfcc.shape[0]
    padding_zeros = np.zeros((padding_num, 39))
    mfcc = np.concatenate((mfcc, padding_zeros), axis = 0)
    X_data.append(mfcc)
X_data = np.asarray(X_data)
print(X_data.shape)
print(X_data[0])

# X_data = []
# y_data = []




(592, 777, 39)
[[ 34.92973   -29.39845    -6.566813  ...,  -1.174628   -0.9207578
   -0.4079923]
 [ 34.54451   -28.62595    -3.350186  ...,  -2.003879   -1.750714
   -1.586505 ]
 [ 35.70015   -29.0122     -1.293243  ...,  -1.74848    -1.748082
   -2.136411 ]
 ..., 
 [  0.          0.          0.        ...,   0.          0.          0.       ]
 [  0.          0.          0.        ...,   0.          0.          0.       ]
 [  0.          0.          0.        ...,   0.          0.          0.       ]]


In [17]:
df

Unnamed: 0,id,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,...,mfcc_31,mfcc_32,mfcc_33,mfcc_34,mfcc_35,mfcc_36,mfcc_37,mfcc_38,fid,f_name
0,fadg0_si1279_1,34.92973,-29.398450,-6.566813,-11.463680,-6.701023,0.785069,4.721459,-11.765300,-4.242596,...,-1.440160,-0.722098,0.259285,-0.141215,-0.700422,-1.174628,-0.920758,-0.407992,1,fadg0_si1279
1,fadg0_si1279_2,34.54451,-28.625950,-3.350186,-5.842904,-2.534442,2.529059,2.233472,5.655792,15.280120,...,-1.246806,-0.973159,-1.468008,-1.353947,-0.409643,-2.003879,-1.750714,-1.586505,2,fadg0_si1279
2,fadg0_si1279_3,35.70015,-29.012200,-1.293243,-7.588605,3.205419,-0.870447,5.626181,0.992069,-0.217316,...,0.189605,-0.558246,-1.790936,-0.963664,0.099606,-1.748480,-1.748082,-2.136411,3,fadg0_si1279
3,fadg0_si1279_4,36.08536,-29.784690,1.057550,-1.260438,1.708064,-18.161400,6.304724,3.657054,7.035933,...,1.985665,0.262952,-1.518736,-1.142797,0.287845,0.048688,-0.267339,-1.019843,4,fadg0_si1279
4,fadg0_si1279_5,34.54451,-29.012200,-6.423817,-7.370393,-4.780475,-14.114580,-17.628430,-6.473778,-0.535101,...,1.958888,1.925121,-0.109968,-0.537114,0.608798,2.041040,2.187808,0.471789,5,fadg0_si1279
5,fadg0_si1279_6,34.92973,-31.715930,-6.852806,-7.152180,-6.874659,-8.044353,-2.968683,-1.302519,8.213675,...,1.009725,0.974232,0.503895,-0.139826,-0.216002,2.054288,3.032464,2.314511,6,fadg0_si1279
6,fadg0_si1279_7,36.85579,-26.825210,-6.995802,-9.104790,-4.530916,-3.261749,5.400002,-3.587494,-0.852886,...,-0.213378,-0.064100,0.820568,0.467953,-0.221637,1.525769,2.348624,2.928442,7,fadg0_si1279
7,fadg0_si1279_8,35.31494,-28.239710,-2.762488,-8.243243,-6.277830,-7.676460,-4.325768,-8.277706,-8.267876,...,-0.290635,-0.188044,1.153164,1.446219,0.418857,-0.102043,0.237793,1.987533,8,fadg0_si1279
8,fadg0_si1279_9,36.08536,-27.467210,-6.280819,-9.319235,-8.090097,-7.124621,2.007291,-1.543042,-2.018099,...,-0.009197,-0.506038,0.263240,0.338641,0.619833,-0.608604,-0.749795,0.102904,9,fadg0_si1279
9,fadg0_si1279_10,34.15930,-31.715930,-8.139772,-12.321460,-5.279593,-0.318609,-5.656034,-4.669850,2.913837,...,-0.424916,0.225761,-0.017955,-0.618986,0.839592,-0.437636,-0.150019,-1.800883,10,fadg0_si1279


In [24]:
mfcc_features = ['mfcc_' + str(i) for i in range(0,39)]
data = []
for rows in df:
    rows = rows[0].as_matrix(mfcc_features)
#         row = (row - np.mean(row, axis = 1)) / np.std(row, axis = 1)
    padding_num = MAX_SEQUENCE_NUM - rows.shape[0]
    padding_zeros = np.zeros((padding_num, 39))
    rows = np.concatenate((rows, padding_zeros), axis = 0)
   
        
  

    data.append(rows)
data = np.array(data)


print(data.shape)

(3696, 777, 39)


In [25]:
for idx, frame in enumerate(data):
    f_id = frame[0].split('_')
    f_name = f_id[0] + '_' + f_id[1]
    f_id = f_id[2]
    frame = np.append(frame, [f_id, f_name])
#     frame.append(f_id)
    print(frame)
    print(frame.shape)
    if idx > 3:
        break



['faem0_si1392_1' 38.3508 -31.04888 -10.46318 -9.166654 -11.21832
 -1.9986979999999999 18.50948 3.4476419999999997 6.247808999999999 -5.24096
 -4.613473 -14.820910000000001 -12.65494 -0.5591731 0.504303 1.61 0.3289518
 3.232636 1.165745 -6.0261830000000005 -0.39176669999999997 1.381758
 4.445412999999999 2.94172 2.511547 2.56285 -0.3261847 -0.16680770000000003
 -0.1314589 -0.4119736 0.2513791 -0.01998425 -1.283352 0.21725279999999997
 0.41205200000000003 0.6228664 0.7548792 1.063677 0.7848173 '37' '1'
 'faem0si1392']
(43,)
['faem0_si1392_10' 33.69101 -31.436809999999998 -8.395291 -11.67296
 -6.498817 -8.701862 -3.285625 -2.250786 7.1793309999999995 -3.129692
 -2.754778 -9.668961 -2.944329 0.04659748 0.7758503000000001 1.61 0.7362261
 0.890581 3.9399349999999997 3.404621 2.068165 0.7292350999999999 0.3920922
 0.45847829999999995 0.5244108000000001 2.290709 3.78355 1.9003290000000002
 -1.535199 0.5012606 -2.272216 -0.44922690000000004 -0.1553739 -2.146168
 1.071831 -0.150805 1.183868 -1.

In [None]:
df['f_id'] = df.index
df['fid'] = df['f_id'].apply(lambda x: x.split('_')[2])
df['f_name'] =  df['f_id'].apply(lambda x: x.split('_')[0] + '_' + x.split('_')[1])
y = df
g = y.groupby('f_name')
y = np.array(list(g))
y = y
data = []
for d in y:
    data.append(d[1])
data


In [5]:
d = dict()
for index, row in df.iterrows():
    d[row['f_name']] = row

