In [1]:
from urllib.request import urlopen, urlretrieve
from bs4 import BeautifulSoup
from scipy.io import wavfile
from python_speech_features import mfcc
from tqdm import tqdm

import re
import os
import numpy as np
import tensorflow as tf

In [2]:
PARAMS = {
    'num_epochs': 30,
    'batch_size': 30,
    'rnn_size': 100,
    'clip_norm': 5.0,
}

In [3]:
def download():
    prefix = 'https://tspace.library.utoronto.ca'
    save_dir = './data/'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    base_url = 'https://tspace.library.utoronto.ca/handle/1807/24'
    urls = [base_url+str(i) for i in range(488, 502)]

    count = 1
    for url in urls:
        soup = BeautifulSoup(urlopen(url).read(), 'html5lib')
        for a in soup.findAll('a', href=re.compile(r'/bitstream/.*.wav')):
            link = a['href']
            print(count, a['href'])

            audio_save_loc = save_dir + link.split('/')[-1]
            if os.path.isfile(audio_save_loc):
                print("File Already Exists")
            urlretrieve(prefix+a['href'], audio_save_loc)

            with open(audio_save_loc.replace('.wav', '.txt'), 'w') as f:
                f.write('say the word ' + link.split('_')[-2])

            count += 1

In [4]:
def sparse_tuple_from(sequences, dtype=np.int32):
    """Create a sparse representention of x.
    Args:
        sequences: a list of lists of type dtype where each element is a sequence
    Returns:
        A tuple with (indices, values, shape)
    """
    indices = []
    values = []

    for n, seq in enumerate(sequences):
        indices.extend(zip([n]*len(seq), range(len(seq))))
        values.extend(seq)

    indices = np.asarray(indices, dtype=np.int64)
    values = np.asarray(values, dtype=dtype)
    shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64)

    return (indices, values, shape)

def train_input_fn(X, y):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.shuffle(10000).batch(PARAMS['batch_size']).repeat(PARAMS['num_epochs'])
    iterator = dataset.make_one_shot_iterator()
    return iterator.get_next()

def rnn_cell():
    return tf.nn.rnn_cell.GRUCell(PARAMS['rnn_size'],
        kernel_initializer=tf.orthogonal_initializer())

def clip_grads(loss_op):
    variables = tf.trainable_variables()
    grads = tf.gradients(loss_op, variables)
    clipped_grads, _ = tf.clip_by_global_norm(grads, PARAMS['clip_norm'])
    return zip(clipped_grads, variables)

def model_fn(features, labels, mode, params):
    seq_lens = tf.count_nonzero(tf.reduce_sum(features, -1), 1, dtype=tf.int32)
    
    outputs, _ = tf.nn.dynamic_rnn(rnn_cell(), features, seq_lens, dtype=tf.float32)
    logits = tf.layers.dense(outputs, PARAMS['num_classes'])
    
    time_major = tf.transpose(logits, [1,0,2])
    decoded, log_prob = tf.nn.ctc_greedy_decoder(time_major, seq_lens)
    decoded = tf.to_int32(decoded[0])
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        preds = tf.sparse_tensor_to_dense(decoded)
        return tf.estimator.EstimatorSpec(mode, predictions=preds)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        loss_op = tf.reduce_mean(tf.nn.ctc_loss(labels, time_major, seq_lens))
        edit_dist_op = tf.reduce_mean(tf.edit_distance(decoded, labels))

        lth = tf.train.LoggingTensorHook({'edit_dist': edit_dist_op}, every_n_iter=100)
        
        train_op = tf.train.AdamOptimizer().apply_gradients(
            clip_grads(loss_op), global_step=tf.train.get_global_step())
        
        return tf.estimator.EstimatorSpec(
            mode=mode, loss=loss_op, train_op=train_op, training_hooks=[lth])

In [5]:
download()

1 /bitstream/1807/24488/1/OAF_youth_neutral.wav
2 /bitstream/1807/24488/2/OAF_young_neutral.wav
3 /bitstream/1807/24488/3/OAF_yes_neutral.wav
4 /bitstream/1807/24488/4/OAF_yearn_neutral.wav
5 /bitstream/1807/24488/5/OAF_witch_neutral.wav
6 /bitstream/1807/24488/6/OAF_wire_neutral.wav
7 /bitstream/1807/24488/7/OAF_wife_neutral.wav
8 /bitstream/1807/24488/8/OAF_white_neutral.wav
9 /bitstream/1807/24488/9/OAF_whip_neutral.wav
10 /bitstream/1807/24488/10/OAF_which_neutral.wav
11 /bitstream/1807/24488/11/OAF_when_neutral.wav
12 /bitstream/1807/24488/12/OAF_wheat_neutral.wav
13 /bitstream/1807/24488/13/OAF_week_neutral.wav
14 /bitstream/1807/24488/14/OAF_wash_neutral.wav
15 /bitstream/1807/24488/15/OAF_walk_neutral.wav
16 /bitstream/1807/24488/16/OAF_wag_neutral.wav
17 /bitstream/1807/24488/17/OAF_vote_neutral.wav
18 /bitstream/1807/24488/18/OAF_void_neutral.wav
19 /bitstream/1807/24488/19/OAF_voice_neutral.wav
20 /bitstream/1807/24488/20/OAF_vine_neutral.wav
21 /bitstream/1807/24488/21/OAF_

166 /bitstream/1807/24488/166/OAF_dog_neutral.wav
167 /bitstream/1807/24488/167/OAF_dodge_neutral.wav
168 /bitstream/1807/24488/168/OAF_ditch_neutral.wav
169 /bitstream/1807/24488/169/OAF_dip_neutral.wav
170 /bitstream/1807/24488/170/OAF_dime_neutral.wav
171 /bitstream/1807/24488/171/OAF_deep_neutral.wav
172 /bitstream/1807/24488/172/OAF_death_neutral.wav
173 /bitstream/1807/24488/173/OAF_dead_neutral.wav
174 /bitstream/1807/24488/174/OAF_date_neutral.wav
175 /bitstream/1807/24488/175/OAF_dab_neutral.wav
176 /bitstream/1807/24488/176/OAF_cool_neutral.wav
177 /bitstream/1807/24488/177/OAF_choice_neutral.wav
178 /bitstream/1807/24488/178/OAF_chief_neutral.wav
179 /bitstream/1807/24488/179/OAF_cheek_neutral.wav
180 /bitstream/1807/24488/180/OAF_check_neutral.wav
181 /bitstream/1807/24488/181/OAF_chat_neutral.wav
182 /bitstream/1807/24488/182/OAF_chalk_neutral.wav
183 /bitstream/1807/24488/183/OAF_chair_neutral.wav
184 /bitstream/1807/24488/184/OAF_chain_neutral.wav
185 /bitstream/1807/244

337 /bitstream/1807/24489/137/YAF_home_fear.wav
338 /bitstream/1807/24489/138/YAF_hole_fear.wav
339 /bitstream/1807/24489/139/YAF_hit_fear.wav
340 /bitstream/1807/24489/140/YAF_hire_fear.wav
341 /bitstream/1807/24489/141/YAF_haze_fear.wav
342 /bitstream/1807/24489/142/YAF_have_fear.wav
343 /bitstream/1807/24489/143/YAF_hate_fear.wav
344 /bitstream/1807/24489/144/YAF_hash_fear.wav
345 /bitstream/1807/24489/145/YAF_hall_fear.wav
346 /bitstream/1807/24489/146/YAF_half_fear.wav
347 /bitstream/1807/24489/147/YAF_gun_fear.wav
348 /bitstream/1807/24489/148/YAF_goose_fear.wav
349 /bitstream/1807/24489/149/YAF_good_fear.wav
350 /bitstream/1807/24489/150/YAF_goal_fear.wav
351 /bitstream/1807/24489/151/YAF_gin_fear.wav
352 /bitstream/1807/24489/152/YAF_get_fear.wav
353 /bitstream/1807/24489/153/YAF_germ_fear.wav
354 /bitstream/1807/24489/154/YAF_gaze_fear.wav
355 /bitstream/1807/24489/155/YAF_gas_fear.wav
356 /bitstream/1807/24489/156/YAF_gap_fear.wav
357 /bitstream/1807/24489/157/YAF_food_fear.w

508 /bitstream/1807/24490/108/YAF_lose_angry.wav
509 /bitstream/1807/24490/109/YAF_lore_angry.wav
510 /bitstream/1807/24490/110/YAF_long_angry.wav
511 /bitstream/1807/24490/111/YAF_loaf_angry.wav
512 /bitstream/1807/24490/112/YAF_live_angry.wav
513 /bitstream/1807/24490/113/YAF_limb_angry.wav
514 /bitstream/1807/24490/114/YAF_life_angry.wav
515 /bitstream/1807/24490/115/YAF_lid_angry.wav
516 /bitstream/1807/24490/116/YAF_lease_angry.wav
517 /bitstream/1807/24490/117/YAF_learn_angry.wav
518 /bitstream/1807/24490/118/YAF_lean_angry.wav
519 /bitstream/1807/24490/119/YAF_laud_angry.wav
520 /bitstream/1807/24490/120/YAF_late_angry.wav
521 /bitstream/1807/24490/121/YAF_knock_angry.wav
522 /bitstream/1807/24490/122/YAF_kite_angry.wav
523 /bitstream/1807/24490/123/YAF_king_angry.wav
524 /bitstream/1807/24490/124/YAF_kill_angry.wav
525 /bitstream/1807/24490/125/YAF_kick_angry.wav
526 /bitstream/1807/24490/126/YAF_keg_angry.wav
527 /bitstream/1807/24490/127/YAF_keep_angry.wav
528 /bitstream/1807

682 /bitstream/1807/24491/82/OAF_pass_ps.wav
683 /bitstream/1807/24491/83/OAF_pain_ps.wav
684 /bitstream/1807/24491/84/OAF_page_ps.wav
685 /bitstream/1807/24491/85/OAF_pad_ps.wav
686 /bitstream/1807/24491/86/OAF_numb_ps.wav
687 /bitstream/1807/24491/87/OAF_note_ps.wav
688 /bitstream/1807/24491/88/OAF_nice_ps.wav
689 /bitstream/1807/24491/89/OAF_neat_ps.wav
690 /bitstream/1807/24491/90/OAF_near_ps.wav
691 /bitstream/1807/24491/91/OAF_name_ps.wav
692 /bitstream/1807/24491/92/OAF_nag_ps.wav
693 /bitstream/1807/24491/93/OAF_mouse_ps.wav
694 /bitstream/1807/24491/94/OAF_mop_ps.wav
695 /bitstream/1807/24491/95/OAF_moon_ps.wav
696 /bitstream/1807/24491/96/OAF_mood_ps.wav
697 /bitstream/1807/24491/97/OAF_mode_ps.wav
698 /bitstream/1807/24491/98/OAF_mob_ps.wav
699 /bitstream/1807/24491/99/OAF_mill_ps.wav
700 /bitstream/1807/24491/100/OAF_met_ps.wav
701 /bitstream/1807/24491/101/OAF_mess_ps.wav
702 /bitstream/1807/24491/102/OAF_merge_ps.wav
703 /bitstream/1807/24491/103/OAF_match_ps.wav
704 /bit

859 /bitstream/1807/24492/59/OAF_rot_fear.wav
860 /bitstream/1807/24492/60/OAF_rose_fear.wav
861 /bitstream/1807/24492/61/OAF_room_fear.wav
862 /bitstream/1807/24492/62/OAF_road_fear.wav
863 /bitstream/1807/24492/63/OAF_ripe_fear.wav
864 /bitstream/1807/24492/64/OAF_ring_fear.wav
865 /bitstream/1807/24492/65/OAF_red_fear.wav
866 /bitstream/1807/24492/66/OAF_read_fear.wav
867 /bitstream/1807/24492/67/OAF_reach_fear.wav
868 /bitstream/1807/24492/68/OAF_rat_fear.wav
869 /bitstream/1807/24492/69/OAF_raise_fear.wav
870 /bitstream/1807/24492/70/OAF_rain_fear.wav
871 /bitstream/1807/24492/71/OAF_raid_fear.wav
872 /bitstream/1807/24492/72/OAF_rag_fear.wav
873 /bitstream/1807/24492/73/OAF_puff_fear.wav
874 /bitstream/1807/24492/74/OAF_pool_fear.wav
875 /bitstream/1807/24492/75/OAF_pole_fear.wav
876 /bitstream/1807/24492/76/OAF_pike_fear.wav
877 /bitstream/1807/24492/77/OAF_pick_fear.wav
878 /bitstream/1807/24492/78/OAF_phone_fear.wav
879 /bitstream/1807/24492/79/OAF_perch_fear.wav
880 /bitstrea

1030 /bitstream/1807/24493/30/YAF_third_happy.wav
1031 /bitstream/1807/24493/31/YAF_thin_happy.wav
1032 /bitstream/1807/24493/32/YAF_tell_happy.wav
1033 /bitstream/1807/24493/33/YAF_team_happy.wav
1034 /bitstream/1807/24493/34/YAF_tape_happy.wav
1035 /bitstream/1807/24493/35/YAF_talk_happy.wav
1036 /bitstream/1807/24493/36/YAF_take_happy.wav
1037 /bitstream/1807/24493/37/YAF_sure_happy.wav
1038 /bitstream/1807/24493/38/YAF_such_happy.wav
1039 /bitstream/1807/24493/39/YAF_sub_happy.wav
1040 /bitstream/1807/24493/40/YAF_south_happy.wav
1041 /bitstream/1807/24493/41/YAF_sour_happy.wav
1042 /bitstream/1807/24493/42/YAF_soup_happy.wav
1043 /bitstream/1807/24493/43/YAF_soap_happy.wav
1044 /bitstream/1807/24493/44/YAF_size_happy.wav
1045 /bitstream/1807/24493/45/YAF_shout_happy.wav
1046 /bitstream/1807/24493/46/YAF_should_happy.wav
1047 /bitstream/1807/24493/47/YAF_shirt_happy.wav
1048 /bitstream/1807/24493/48/YAF_sheep_happy.wav
1049 /bitstream/1807/24493/49/YAF_shawl_happy.wav
1050 /bitstre

1195 /bitstream/1807/24493/195/YAF_beg_happy.wav
1196 /bitstream/1807/24493/196/YAF_bean_happy.wav
1197 /bitstream/1807/24493/197/YAF_bath_happy.wav
1198 /bitstream/1807/24493/198/YAF_base_happy.wav
1199 /bitstream/1807/24493/199/YAF_bar_happy.wav
1200 /bitstream/1807/24493/200/YAF_back_happy.wav
1201 /bitstream/1807/24494/1/OAF_youth_disgust.wav
1202 /bitstream/1807/24494/2/OAF_young_disgust.wav
1203 /bitstream/1807/24494/3/OAF_yes_disgust.wav
1204 /bitstream/1807/24494/4/OAF_yearn_disgust.wav
1205 /bitstream/1807/24494/5/OAF_witch_disgust.wav
1206 /bitstream/1807/24494/6/OAF_wire_disgust.wav
1207 /bitstream/1807/24494/7/OAF_wife_disgust.wav
1208 /bitstream/1807/24494/8/OAF_white_disgust.wav
1209 /bitstream/1807/24494/9/OAF_whip_disgust.wav
1210 /bitstream/1807/24494/10/OAF_which_disgust.wav
1211 /bitstream/1807/24494/11/OAF_when_disgust.wav
1212 /bitstream/1807/24494/12/OAF_wheat_disgust.wav
1213 /bitstream/1807/24494/13/OAF_week_disgust.wav
1214 /bitstream/1807/24494/14/OAF_wash_dis

1355 /bitstream/1807/24494/155/OAF_gas_disgust.wav
1356 /bitstream/1807/24494/156/OAF_gap_disgust.wav
1357 /bitstream/1807/24494/157/OAF_food_disgust.wav
1358 /bitstream/1807/24494/158/OAF_five_disgust.wav
1359 /bitstream/1807/24494/159/OAF_fit_disgust.wav
1360 /bitstream/1807/24494/160/OAF_fat_disgust.wav
1361 /bitstream/1807/24494/161/OAF_far_disgust.wav
1362 /bitstream/1807/24494/162/OAF_fall_disgust.wav
1363 /bitstream/1807/24494/163/OAF_fail_disgust.wav
1364 /bitstream/1807/24494/164/OAF_door_disgust.wav
1365 /bitstream/1807/24494/165/OAF_doll_disgust.wav
1366 /bitstream/1807/24494/166/OAF_dog_disgust.wav
1367 /bitstream/1807/24494/167/OAF_dodge_disgust.wav
1368 /bitstream/1807/24494/168/OAF_ditch_disgust.wav
1369 /bitstream/1807/24494/169/OAF_dip_disgust.wav
1370 /bitstream/1807/24494/170/OAF_dime_disgust.wav
1371 /bitstream/1807/24494/171/OAF_deep_disgust.wav
1372 /bitstream/1807/24494/172/OAF_death_disgust.wav
1373 /bitstream/1807/24494/173/OAF_dead_disgust.wav
1374 /bitstream/

1527 /bitstream/1807/24495/127/YAF_keep_ps.wav
1528 /bitstream/1807/24495/128/YAF_keen_ps.wav
1529 /bitstream/1807/24495/129/YAF_juice_ps.wav
1530 /bitstream/1807/24495/130/YAF_jug_ps.wav
1531 /bitstream/1807/24495/131/YAF_judge_ps.wav
1532 /bitstream/1807/24495/132/YAF_join_ps.wav
1533 /bitstream/1807/24495/133/YAF_jar_ps.wav
1534 /bitstream/1807/24495/134/YAF_jail_ps.wav
1535 /bitstream/1807/24495/135/YAF_hush_ps.wav
1536 /bitstream/1807/24495/136/YAF_hurl_ps.wav
1537 /bitstream/1807/24495/137/YAF_home_ps.wav
1538 /bitstream/1807/24495/138/YAF_hole_ps.wav
1539 /bitstream/1807/24495/139/YAF_hit_ps.wav
1540 /bitstream/1807/24495/140/YAF_hire_ps.wav
1541 /bitstream/1807/24495/141/YAF_haze_ps.wav
1542 /bitstream/1807/24495/142/YAF_have_ps.wav
1543 /bitstream/1807/24495/143/YAF_hate_ps.wav
1544 /bitstream/1807/24495/144/YAF_hash_ps.wav
1545 /bitstream/1807/24495/145/YAF_hall_ps.wav
1546 /bitstream/1807/24495/146/YAF_half_ps.wav
1547 /bitstream/1807/24495/147/YAF_gun_ps.wav
1548 /bitstream

1694 /bitstream/1807/24496/94/YAF_mop_neutral.wav
1695 /bitstream/1807/24496/95/YAF_moon_neutral.wav
1696 /bitstream/1807/24496/96/YAF_mood_neutral.wav
1697 /bitstream/1807/24496/97/YAF_mode_neutral.wav
1698 /bitstream/1807/24496/98/YAF_mob_neutral.wav
1699 /bitstream/1807/24496/99/YAF_mill_neutral.wav
1700 /bitstream/1807/24496/100/YAF_met_neutral.wav
1701 /bitstream/1807/24496/101/YAF_mess_neutral.wav
1702 /bitstream/1807/24496/102/YAF_merge_neutral.wav
1703 /bitstream/1807/24496/103/YAF_match_neutral.wav
1704 /bitstream/1807/24496/104/YAF_make_neutral.wav
1705 /bitstream/1807/24496/105/YAF_luck_neutral.wav
1706 /bitstream/1807/24496/106/YAF_love_neutral.wav
1707 /bitstream/1807/24496/107/YAF_lot_neutral.wav
1708 /bitstream/1807/24496/108/YAF_lose_neutral.wav
1709 /bitstream/1807/24496/109/YAF_lore_neutral.wav
1710 /bitstream/1807/24496/110/YAF_long_neutral.wav
1711 /bitstream/1807/24496/111/YAF_loaf_neutral.wav
1712 /bitstream/1807/24496/112/YAF_live_neutral.wav
1713 /bitstream/1807

1857 /bitstream/1807/24497/57/OAF_rush_sad.wav
1858 /bitstream/1807/24497/58/OAF_rough_sad.wav
1859 /bitstream/1807/24497/59/OAF_rot_sad.wav
1860 /bitstream/1807/24497/60/OAF_rose_sad.wav
1861 /bitstream/1807/24497/61/OAF_room_sad.wav
1862 /bitstream/1807/24497/62/OAF_road_sad.wav
1863 /bitstream/1807/24497/63/OAF_ripe_sad.wav
1864 /bitstream/1807/24497/64/OAF_ring_sad.wav
1865 /bitstream/1807/24497/65/OAF_red_sad.wav
1866 /bitstream/1807/24497/66/OAF_read_sad.wav
1867 /bitstream/1807/24497/67/OAF_reach_sad.wav
1868 /bitstream/1807/24497/68/OAF_rat_sad.wav
1869 /bitstream/1807/24497/69/OAF_raise_sad.wav
1870 /bitstream/1807/24497/70/OAF_rain_sad.wav
1871 /bitstream/1807/24497/71/OAF_raid_sad.wav
1872 /bitstream/1807/24497/72/OAF_rag_sad.wav
1873 /bitstream/1807/24497/73/OAF_puff_sad.wav
1874 /bitstream/1807/24497/74/OAF_pool_sad.wav
1875 /bitstream/1807/24497/75/OAF_pole_sad.wav
1876 /bitstream/1807/24497/76/OAF_pike_sad.wav
1877 /bitstream/1807/24497/77/OAF_pick_sad.wav
1878 /bitstrea

2028 /bitstream/1807/24498/28/YAF_thumb_disgust.wav
2029 /bitstream/1807/24498/29/YAF_thought_disgust.wav
2030 /bitstream/1807/24498/30/YAF_third_disgust.wav
2031 /bitstream/1807/24498/31/YAF_thin_disgust.wav
2032 /bitstream/1807/24498/32/YAF_tell_disgust.wav
2033 /bitstream/1807/24498/33/YAF_team_disgust.wav
2034 /bitstream/1807/24498/34/YAF_tape_disgust.wav
2035 /bitstream/1807/24498/35/YAF_talk_disgust.wav
2036 /bitstream/1807/24498/36/YAF_take_disgust.wav
2037 /bitstream/1807/24498/37/YAF_sure_disgust.wav
2038 /bitstream/1807/24498/38/YAF_such_disgust.wav
2039 /bitstream/1807/24498/39/YAF_sub_disgust.wav
2040 /bitstream/1807/24498/40/YAF_south_disgust.wav
2041 /bitstream/1807/24498/41/YAF_sour_disgust.wav
2042 /bitstream/1807/24498/42/YAF_soup_disgust.wav
2043 /bitstream/1807/24498/43/YAF_soap_disgust.wav
2044 /bitstream/1807/24498/44/YAF_size_disgust.wav
2045 /bitstream/1807/24498/45/YAF_shout_disgust.wav
2046 /bitstream/1807/24498/46/YAF_should_disgust.wav
2047 /bitstream/1807/24

2187 /bitstream/1807/24498/187/YAF_calm_disgust.wav
2188 /bitstream/1807/24498/188/YAF_cab_disgust.wav
2189 /bitstream/1807/24498/189/YAF_burn_disgust.wav
2190 /bitstream/1807/24498/190/YAF_bought_disgust.wav
2191 /bitstream/1807/24498/191/YAF_book_disgust.wav
2192 /bitstream/1807/24498/192/YAF_bone_disgust.wav
2193 /bitstream/1807/24498/193/YAF_boat_disgust.wav
2194 /bitstream/1807/24498/194/YAF_bite_disgust.wav
2195 /bitstream/1807/24498/195/YAF_beg_disgust.wav
2196 /bitstream/1807/24498/196/YAF_bean_disgust.wav
2197 /bitstream/1807/24498/197/YAF_bath_disgust.wav
2198 /bitstream/1807/24498/198/YAF_base_disgust.wav
2199 /bitstream/1807/24498/199/YAF_bar_disgust.wav
2200 /bitstream/1807/24498/200/YAF_back_disgust.wav
2201 /bitstream/1807/24499/1/OAF_youth_angry.wav
2202 /bitstream/1807/24499/2/OAF_young_angry.wav
2203 /bitstream/1807/24499/3/OAF_yes_angry.wav
2204 /bitstream/1807/24499/4/OAF_yearn_angry.wav
2205 /bitstream/1807/24499/5/OAF_witch_angry.wav
2206 /bitstream/1807/24499/6/O

2353 /bitstream/1807/24499/153/OAF_germ_angry.wav
2354 /bitstream/1807/24499/154/OAF_gaze_angry.wav
2355 /bitstream/1807/24499/155/OAF_gas_angry.wav
2356 /bitstream/1807/24499/156/OAF_gap_angry.wav
2357 /bitstream/1807/24499/157/OAF_food_angry.wav
2358 /bitstream/1807/24499/158/OAF_five_angry.wav
2359 /bitstream/1807/24499/159/OAF_fit_angry.wav
2360 /bitstream/1807/24499/160/OAF_fat_angry.wav
2361 /bitstream/1807/24499/161/OAF_far_angry.wav
2362 /bitstream/1807/24499/162/OAF_fall_angry.wav
2363 /bitstream/1807/24499/163/OAF_fail_angry.wav
2364 /bitstream/1807/24499/164/OAF_door_angry.wav
2365 /bitstream/1807/24499/165/OAF_doll_angry.wav
2366 /bitstream/1807/24499/166/OAF_dog_angry.wav
2367 /bitstream/1807/24499/167/OAF_dodge_angry.wav
2368 /bitstream/1807/24499/168/OAF_ditch_angry.wav
2369 /bitstream/1807/24499/169/OAF_dip_angry.wav
2370 /bitstream/1807/24499/170/OAF_dime_angry.wav
2371 /bitstream/1807/24499/171/OAF_deep_angry.wav
2372 /bitstream/1807/24499/172/OAF_death_angry.wav
2373

2524 /bitstream/1807/24500/124/YAF_kill_sad.wav
2525 /bitstream/1807/24500/125/YAF_kick_sad.wav
2526 /bitstream/1807/24500/126/YAF_keg_sad.wav
2527 /bitstream/1807/24500/127/YAF_keep_sad.wav
2528 /bitstream/1807/24500/128/YAF_keen_sad.wav
2529 /bitstream/1807/24500/129/YAF_juice_sad.wav
2530 /bitstream/1807/24500/130/YAF_jug_sad.wav
2531 /bitstream/1807/24500/131/YAF_judge_sad.wav
2532 /bitstream/1807/24500/132/YAF_join_sad.wav
2533 /bitstream/1807/24500/133/YAF_jar_sad.wav
2534 /bitstream/1807/24500/134/YAF_jail_sad.wav
2535 /bitstream/1807/24500/135/YAF_hush_sad.wav
2536 /bitstream/1807/24500/136/YAF_hurl_sad.wav
2537 /bitstream/1807/24500/137/YAF_home_sad.wav
2538 /bitstream/1807/24500/138/YAF_hole_sad.wav
2539 /bitstream/1807/24500/139/YAF_hit_sad.wav
2540 /bitstream/1807/24500/140/YAF_hire_sad.wav
2541 /bitstream/1807/24500/141/YAF_haze_sad.wav
2542 /bitstream/1807/24500/142/YAF_have_sad.wav
2543 /bitstream/1807/24500/143/YAF_hate_sad.wav
2544 /bitstream/1807/24500/144/YAF_hash_sa

2693 /bitstream/1807/24501/93/OAF_mouse_happy.wav
2694 /bitstream/1807/24501/94/OAF_mop_happy.wav
2695 /bitstream/1807/24501/95/OAF_moon_happy.wav
2696 /bitstream/1807/24501/96/OAF_mood_happy.wav
2697 /bitstream/1807/24501/97/OAF_mode_happy.wav
2698 /bitstream/1807/24501/98/OAF_mob_happy.wav
2699 /bitstream/1807/24501/99/OAF_mill_happy.wav
2700 /bitstream/1807/24501/100/OAF_met_happy.wav
2701 /bitstream/1807/24501/101/OAF_mess_happy.wav
2702 /bitstream/1807/24501/102/OAF_merge_happy.wav
2703 /bitstream/1807/24501/103/OAF_match_happy.wav
2704 /bitstream/1807/24501/104/OAF_make_happy.wav
2705 /bitstream/1807/24501/105/OAF_luck_happy.wav
2706 /bitstream/1807/24501/106/OAF_love_happy.wav
2707 /bitstream/1807/24501/107/OAF_lot_happy.wav
2708 /bitstream/1807/24501/108/OAF_lose_happy.wav
2709 /bitstream/1807/24501/109/OAF_lore_happy.wav
2710 /bitstream/1807/24501/110/OAF_long_happy.wav
2711 /bitstream/1807/24501/111/OAF_loaf_happy.wav
2712 /bitstream/1807/24501/112/OAF_live_happy.wav
2713 /bi

In [6]:
wav_files = [f for f in os.listdir('./data') if f.endswith('.wav')]
text_files = [f for f in os.listdir('./data') if f.endswith('.txt')]

inputs, targets = [], []
for (wav_file, text_file) in tqdm(zip(wav_files, text_files), total=len(wav_files), ncols=70):
    path = './data/' + wav_file
    try:
        fs, audio = wavfile.read(path)
    except:
        continue
    input = mfcc(audio, samplerate=fs, nfft=1024)
    inputs.append(input)
    with open('./data/'+text_file) as f:
        targets.append(f.read())

inputs = tf.keras.preprocessing.sequence.pad_sequences(
    inputs, dtype='float32', padding='post')

chars = list(set([c for target in targets for c in target]))
PARAMS['num_classes'] = len(chars) + 1

idx2char = {idx: char for idx, char in enumerate(chars)}
char2idx = {char: idx for idx, char in idx2char.items()}

targets = [[char2idx[c] for c in target] for target in targets]

inputs_val = np.expand_dims(inputs[-1], 0)
targets_val = targets[-1]

inputs_train = inputs[:-1]
targets_train = targets[:-1]
targets_train = tf.SparseTensor(*sparse_tuple_from(targets_train))

100%|████████████████████████████| 2800/2800 [00:16<00:00, 164.78it/s]


In [7]:
estimator = tf.estimator.Estimator(model_fn)

estimator.train(lambda: train_input_fn(inputs_train, targets_train))

preds = list(estimator.predict(tf.estimator.inputs.numpy_input_fn(inputs_val, shuffle=False)))

print('Prediction:', ''.join([idx2char[idx] for idx in preds[0]]))
print('Actual:', ''.join([idx2char[idx] for idx in targets_val]))

INFO:tensorflow:Using default config.


INFO:tensorflow:Using default config.






INFO:tensorflow:Using config: {'_model_dir': '/var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12121a898>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': '/var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12121a898>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}






INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 1 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2/model.ckpt.


INFO:tensorflow:Saving checkpoints for 1 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2/model.ckpt.


INFO:tensorflow:loss = 493.68723, step = 1


INFO:tensorflow:loss = 493.68723, step = 1


INFO:tensorflow:edit_dist = 3.3405912


INFO:tensorflow:edit_dist = 3.3405912


INFO:tensorflow:global_step/sec: 6.32391


INFO:tensorflow:global_step/sec: 6.32391


INFO:tensorflow:loss = 33.780342, step = 101 (15.814 sec)


INFO:tensorflow:loss = 33.780342, step = 101 (15.814 sec)


INFO:tensorflow:edit_dist = 0.9237767 (15.814 sec)


INFO:tensorflow:edit_dist = 0.9237767 (15.814 sec)


INFO:tensorflow:global_step/sec: 6.34815


INFO:tensorflow:global_step/sec: 6.34815


INFO:tensorflow:loss = 22.969511, step = 201 (15.753 sec)


INFO:tensorflow:loss = 22.969511, step = 201 (15.753 sec)


INFO:tensorflow:edit_dist = 0.64042753 (15.753 sec)


INFO:tensorflow:edit_dist = 0.64042753 (15.753 sec)


INFO:tensorflow:global_step/sec: 6.1292


INFO:tensorflow:global_step/sec: 6.1292


INFO:tensorflow:loss = 18.041271, step = 301 (16.315 sec)


INFO:tensorflow:loss = 18.041271, step = 301 (16.315 sec)


INFO:tensorflow:edit_dist = 0.4439488 (16.315 sec)


INFO:tensorflow:edit_dist = 0.4439488 (16.315 sec)


INFO:tensorflow:global_step/sec: 6.47447


INFO:tensorflow:global_step/sec: 6.47447


INFO:tensorflow:loss = 14.68385, step = 401 (15.445 sec)


INFO:tensorflow:loss = 14.68385, step = 401 (15.445 sec)


INFO:tensorflow:edit_dist = 0.33288398 (15.445 sec)


INFO:tensorflow:edit_dist = 0.33288398 (15.445 sec)


INFO:tensorflow:global_step/sec: 6.27819


INFO:tensorflow:global_step/sec: 6.27819


INFO:tensorflow:loss = 12.546645, step = 501 (15.928 sec)


INFO:tensorflow:loss = 12.546645, step = 501 (15.928 sec)


INFO:tensorflow:edit_dist = 0.26428378 (15.928 sec)


INFO:tensorflow:edit_dist = 0.26428378 (15.928 sec)


INFO:tensorflow:global_step/sec: 6.09566


INFO:tensorflow:global_step/sec: 6.09566


INFO:tensorflow:loss = 10.570291, step = 601 (16.406 sec)


INFO:tensorflow:loss = 10.570291, step = 601 (16.406 sec)


INFO:tensorflow:edit_dist = 0.23011984 (16.406 sec)


INFO:tensorflow:edit_dist = 0.23011984 (16.406 sec)


INFO:tensorflow:global_step/sec: 5.88395


INFO:tensorflow:global_step/sec: 5.88395


INFO:tensorflow:loss = 9.879487, step = 701 (16.995 sec)


INFO:tensorflow:loss = 9.879487, step = 701 (16.995 sec)


INFO:tensorflow:edit_dist = 0.23076124 (16.995 sec)


INFO:tensorflow:edit_dist = 0.23076124 (16.995 sec)


INFO:tensorflow:global_step/sec: 5.91025


INFO:tensorflow:global_step/sec: 5.91025


INFO:tensorflow:loss = 9.744004, step = 801 (16.920 sec)


INFO:tensorflow:loss = 9.744004, step = 801 (16.920 sec)


INFO:tensorflow:edit_dist = 0.23491286 (16.919 sec)


INFO:tensorflow:edit_dist = 0.23491286 (16.919 sec)


INFO:tensorflow:global_step/sec: 6.28194


INFO:tensorflow:global_step/sec: 6.28194


INFO:tensorflow:loss = 8.869291, step = 901 (15.919 sec)


INFO:tensorflow:loss = 8.869291, step = 901 (15.919 sec)


INFO:tensorflow:edit_dist = 0.23263688 (15.919 sec)


INFO:tensorflow:edit_dist = 0.23263688 (15.919 sec)


INFO:tensorflow:global_step/sec: 6.05579


INFO:tensorflow:global_step/sec: 6.05579


INFO:tensorflow:loss = 10.1293, step = 1001 (16.513 sec)


INFO:tensorflow:loss = 10.1293, step = 1001 (16.513 sec)


INFO:tensorflow:edit_dist = 0.23440975 (16.513 sec)


INFO:tensorflow:edit_dist = 0.23440975 (16.513 sec)


INFO:tensorflow:global_step/sec: 6.22978


INFO:tensorflow:global_step/sec: 6.22978


INFO:tensorflow:loss = 9.024219, step = 1101 (16.052 sec)


INFO:tensorflow:loss = 9.024219, step = 1101 (16.052 sec)


INFO:tensorflow:edit_dist = 0.22581771 (16.052 sec)


INFO:tensorflow:edit_dist = 0.22581771 (16.052 sec)


INFO:tensorflow:global_step/sec: 6.2092


INFO:tensorflow:global_step/sec: 6.2092


INFO:tensorflow:loss = 8.325614, step = 1201 (16.105 sec)


INFO:tensorflow:loss = 8.325614, step = 1201 (16.105 sec)


INFO:tensorflow:edit_dist = 0.22008513 (16.105 sec)


INFO:tensorflow:edit_dist = 0.22008513 (16.105 sec)


INFO:tensorflow:global_step/sec: 6.28781


INFO:tensorflow:global_step/sec: 6.28781


INFO:tensorflow:loss = 7.7545857, step = 1301 (15.904 sec)


INFO:tensorflow:loss = 7.7545857, step = 1301 (15.904 sec)


INFO:tensorflow:edit_dist = 0.21311821 (15.904 sec)


INFO:tensorflow:edit_dist = 0.21311821 (15.904 sec)


INFO:tensorflow:global_step/sec: 6.0732


INFO:tensorflow:global_step/sec: 6.0732


INFO:tensorflow:loss = 7.438212, step = 1401 (16.466 sec)


INFO:tensorflow:loss = 7.438212, step = 1401 (16.466 sec)


INFO:tensorflow:edit_dist = 0.20940976 (16.466 sec)


INFO:tensorflow:edit_dist = 0.20940976 (16.466 sec)


INFO:tensorflow:global_step/sec: 5.98418


INFO:tensorflow:global_step/sec: 5.98418


INFO:tensorflow:loss = 8.097485, step = 1501 (16.711 sec)


INFO:tensorflow:loss = 8.097485, step = 1501 (16.711 sec)


INFO:tensorflow:edit_dist = 0.22991702 (16.711 sec)


INFO:tensorflow:edit_dist = 0.22991702 (16.711 sec)


INFO:tensorflow:global_step/sec: 6.16989


INFO:tensorflow:global_step/sec: 6.16989


INFO:tensorflow:loss = 7.615379, step = 1601 (16.208 sec)


INFO:tensorflow:loss = 7.615379, step = 1601 (16.208 sec)


INFO:tensorflow:edit_dist = 0.21025613 (16.208 sec)


INFO:tensorflow:edit_dist = 0.21025613 (16.208 sec)


INFO:tensorflow:global_step/sec: 6.19937


INFO:tensorflow:global_step/sec: 6.19937


INFO:tensorflow:loss = 7.241102, step = 1701 (16.131 sec)


INFO:tensorflow:loss = 7.241102, step = 1701 (16.131 sec)


INFO:tensorflow:edit_dist = 0.21157408 (16.131 sec)


INFO:tensorflow:edit_dist = 0.21157408 (16.131 sec)


INFO:tensorflow:global_step/sec: 6.14893


INFO:tensorflow:global_step/sec: 6.14893


INFO:tensorflow:loss = 7.7785172, step = 1801 (16.263 sec)


INFO:tensorflow:loss = 7.7785172, step = 1801 (16.263 sec)


INFO:tensorflow:edit_dist = 0.2109874 (16.263 sec)


INFO:tensorflow:edit_dist = 0.2109874 (16.263 sec)


INFO:tensorflow:global_step/sec: 6.13687


INFO:tensorflow:global_step/sec: 6.13687


INFO:tensorflow:loss = 7.0281825, step = 1901 (16.295 sec)


INFO:tensorflow:loss = 7.0281825, step = 1901 (16.295 sec)


INFO:tensorflow:edit_dist = 0.20424908 (16.295 sec)


INFO:tensorflow:edit_dist = 0.20424908 (16.295 sec)


INFO:tensorflow:global_step/sec: 6.02177


INFO:tensorflow:global_step/sec: 6.02177


INFO:tensorflow:loss = 7.380404, step = 2001 (16.607 sec)


INFO:tensorflow:loss = 7.380404, step = 2001 (16.607 sec)


INFO:tensorflow:edit_dist = 0.20698601 (16.606 sec)


INFO:tensorflow:edit_dist = 0.20698601 (16.606 sec)


INFO:tensorflow:global_step/sec: 5.87731


INFO:tensorflow:global_step/sec: 5.87731


INFO:tensorflow:loss = 6.30469, step = 2101 (17.014 sec)


INFO:tensorflow:loss = 6.30469, step = 2101 (17.014 sec)


INFO:tensorflow:edit_dist = 0.18560728 (17.015 sec)


INFO:tensorflow:edit_dist = 0.18560728 (17.015 sec)


INFO:tensorflow:global_step/sec: 6.1825


INFO:tensorflow:global_step/sec: 6.1825


INFO:tensorflow:loss = 7.5258703, step = 2201 (16.175 sec)


INFO:tensorflow:loss = 7.5258703, step = 2201 (16.175 sec)


INFO:tensorflow:edit_dist = 0.21237889 (16.174 sec)


INFO:tensorflow:edit_dist = 0.21237889 (16.174 sec)


INFO:tensorflow:global_step/sec: 6.22797


INFO:tensorflow:global_step/sec: 6.22797


INFO:tensorflow:loss = 6.7131095, step = 2301 (16.057 sec)


INFO:tensorflow:loss = 6.7131095, step = 2301 (16.057 sec)


INFO:tensorflow:edit_dist = 0.19207518 (16.057 sec)


INFO:tensorflow:edit_dist = 0.19207518 (16.057 sec)


INFO:tensorflow:global_step/sec: 6.23884


INFO:tensorflow:global_step/sec: 6.23884


INFO:tensorflow:loss = 6.8590493, step = 2401 (16.028 sec)


INFO:tensorflow:loss = 6.8590493, step = 2401 (16.028 sec)


INFO:tensorflow:edit_dist = 0.19281046 (16.028 sec)


INFO:tensorflow:edit_dist = 0.19281046 (16.028 sec)


INFO:tensorflow:global_step/sec: 5.96235


INFO:tensorflow:global_step/sec: 5.96235


INFO:tensorflow:loss = 6.6002755, step = 2501 (16.772 sec)


INFO:tensorflow:loss = 6.6002755, step = 2501 (16.772 sec)


INFO:tensorflow:edit_dist = 0.18627524 (16.772 sec)


INFO:tensorflow:edit_dist = 0.18627524 (16.772 sec)


INFO:tensorflow:global_step/sec: 5.96816


INFO:tensorflow:global_step/sec: 5.96816


INFO:tensorflow:loss = 7.0147314, step = 2601 (16.756 sec)


INFO:tensorflow:loss = 7.0147314, step = 2601 (16.756 sec)


INFO:tensorflow:edit_dist = 0.18357845 (16.756 sec)


INFO:tensorflow:edit_dist = 0.18357845 (16.756 sec)


INFO:tensorflow:global_step/sec: 6.16312


INFO:tensorflow:global_step/sec: 6.16312


INFO:tensorflow:loss = 6.705098, step = 2701 (16.225 sec)


INFO:tensorflow:loss = 6.705098, step = 2701 (16.225 sec)


INFO:tensorflow:edit_dist = 0.18995169 (16.224 sec)


INFO:tensorflow:edit_dist = 0.18995169 (16.224 sec)


INFO:tensorflow:global_step/sec: 6.00773


INFO:tensorflow:global_step/sec: 6.00773


INFO:tensorflow:loss = 6.9078755, step = 2801 (16.646 sec)


INFO:tensorflow:loss = 6.9078755, step = 2801 (16.646 sec)


INFO:tensorflow:edit_dist = 0.21211128 (16.646 sec)


INFO:tensorflow:edit_dist = 0.21211128 (16.646 sec)


INFO:tensorflow:Saving checkpoints for 2820 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2/model.ckpt.


INFO:tensorflow:Saving checkpoints for 2820 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2/model.ckpt.


INFO:tensorflow:Loss for final step: 4.6740785.


INFO:tensorflow:Loss for final step: 4.6740785.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2/model.ckpt-2820


INFO:tensorflow:Restoring parameters from /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmp74175gf2/model.ckpt-2820


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


Prediction: say the word 
Actual: say the word youth
