In [1]:
import json
import pickle
import pprint
import os
from basics import *

In [41]:
model_cfg = {}
train_cfg = {}
model_cfg['data_path'] = "./both_fbank_out"
model_cfg['wavs_path'] = os.path.join(model_cfg['data_path'], "wavs")
config_name = "sp_config_1"
model_cfg['model_dir'] = config_name
model_cfg['model_fname'] = os.path.join(model_cfg['model_dir'], 'seq2seq.model')
model_cfg['train_log'] = os.path.join(model_cfg['model_dir'], 'train.log')
model_cfg['dev_log'] = os.path.join(model_cfg['model_dir'], 'dev.log')

# ------------------------------------
# FIXED params
# ------------------------------------
# encoder key
# 'es_w', 'es_c', or 'sp', and: # 'en_w', 'en_c', or 'sp'
model_cfg['enc_key'] = 'sp'
model_cfg['dec_key'] = 'en_w'
model_cfg['sp_dim'] = 40
model_cfg['stemmify'] = False
model_cfg['bi_rnn'] = False
model_cfg['train_set'] = 'fisher_train'
model_cfg['dev_set'] = 'fisher_dev'

model_cfg['rnn_unit'] = RNN_GRU
model_cfg['ln'] = True
model_cfg['bn'] = True
model_cfg['attn'] = SOFT_ATTN

model_cfg['l2']=1e-4
model_cfg['grad_clip']=10
model_cfg['rnn_dropout']=0.3
model_cfg['out_dropout']=0.3

model_cfg['hidden_units'] = 512
model_cfg['embedding_units'] = 256
model_cfg['attn_units'] = 128

model_cfg['enc_layers'] = 3
model_cfg['dec_layers'] = 3
model_cfg['highway_layers'] = 0

model_cfg['buckets_num'] = 15
model_cfg['buckets_width'] = 128
model_cfg['max_en_pred'] = 150
model_cfg['train_scale'] = 1
model_cfg['seed'] = "random_seed_{0:d}".format(model_cfg['train_scale'])

k_size_1, stride_1, filters_1 = 3, 2, 32
k_size_2, stride_2, filters_2 = 3, 2, 32
model_cfg['cnn_layers'] = [
        {"in_channels": None,
        "out_channels": 32,
        "ksize": (k_size_1,3),
        "stride": (stride_1,2),
        "pad": (k_size_1 // 2, 3 // 2)},
        {"in_channels": None,
        "out_channels": 32,
        "ksize": (k_size_2,3),
        "stride": (stride_2,2),
        "pad": (k_size_2 // 2, 3 // 2)},
]

# cnn_str = "cnn-{0:d}-{1:d}-{2:d}--{3:d}-{4:d}-{5:d}".format(k_size_1, stride_1, filters_1, 
#                                                             k_size_2, stride_2, filters_2)


# ------------------------------------
# VARIABLE params
# ------------------------------------
train_cfg['gpuid'] = 0
train_cfg['lr'] = 0.01
train_cfg['teach_ratio'] = 0.8
train_cfg['optimizer'] = OPT_ADAM
# default noise function is recommended to be either: 0.01, 0.3 or 1.0
train_cfg['grad_noise_eta'] = 0.01

train_cfg['iters_save_model'] = 5
train_cfg['speech_noise']=0.250
train_cfg['iter_weight_noise'] = 0
train_cfg['weight_noise_mean'] = 0.0
train_cfg['weight_noise_sigma'] = 0.001
train_cfg['batch_size'] = {'max': 256, 'med': 200, 'min': 100, 'scale':1}

# model_prep_buckets.buckets_main(model_cfg['data_path'], num_b, width_b, enc_key)
print("cnn details:")
for d in model_cfg['cnn_layers']:
    print(d)

cnn details:
{'in_channels': None, 'out_channels': 32, 'ksize': (3, 3), 'stride': (2, 2), 'pad': (1, 1)}
{'in_channels': None, 'out_channels': 32, 'ksize': (3, 3), 'stride': (2, 2), 'pad': (1, 1)}


In [42]:
if not os.path.exists(model_cfg['model_dir']):
    os.makedirs(model_cfg['model_dir'])

In [43]:
model_cfg_name = os.path.join(model_cfg['model_dir'], 'model_cfg.json')
train_cfg_name = os.path.join(model_cfg['model_dir'], 'train_cfg.json')

In [44]:
with open(model_cfg_name, "w") as model_f:
    json.dump(model_cfg, model_f, indent=4)

In [45]:
with open(train_cfg_name, "w") as train_f:
    json.dump(train_cfg, train_f, indent=4)

In [46]:
with open(model_cfg_name, "r") as model_f:
    model_cfg = json.load(model_f) 

In [8]:
import prep_buckets

In [18]:
prep_buckets.buckets_main(model_cfg['data_path'], 15, 128, 'sp', scale=2, 
                          seed=model_cfg['seed'])

--------------------------------------------------
loading info_dict from=./both_fbank_out/info.dict
--------------------------------------------------
creating buckets for: fisher_dev


100%|██████████| 3977/3977 [00:00<00:00, 533254.92it/s]
100%|██████████| 3959/3959 [00:00<00:00, 550845.90it/s]
100%|██████████| 3641/3641 [00:00<00:00, 377240.77it/s]
 31%|███       | 42707/138708 [00:00<00:00, 427065.32it/s]

creating buckets for key: sp
creating buckets for: fisher_dev2
creating buckets for key: sp
creating buckets for: fisher_test
creating buckets for key: sp
creating buckets for: fisher_train
creating buckets for key: sp


100%|██████████| 138708/138708 [00:00<00:00, 442929.20it/s]
100%|██████████| 3801/3801 [00:00<00:00, 512770.56it/s]
100%|██████████| 1824/1824 [00:00<00:00, 421115.79it/s]
100%|██████████| 14284/14284 [00:00<00:00, 490831.95it/s]


creating buckets for: callhome_devtest
creating buckets for key: sp
creating buckets for: callhome_evltest
creating buckets for key: sp
creating buckets for: callhome_train
creating buckets for key: sp
--------------------------------------------------
saving info dict in: ./both_fbank_out/buckets_sp.dict
all done ...
--------------------------------------------------
showing buckets for category: fisher_dev
number of buckets=15, width of each bucket=128
index | width | num   
    0 |     0 |    808
    1 |   128 |    996
    2 |   256 |    627
    3 |   384 |    420
    4 |   512 |    298
    5 |   640 |    261
    6 |   768 |    200
    7 |   896 |    148
    8 |  1024 |    107
    9 |  1152 |     51
   10 |  1280 |     40
   11 |  1408 |     13
   12 |  1536 |      5
   13 |  1664 |      1
   14 |  1792 |      2
--------------------------------------------------
showing buckets for category: fisher_dev2
number of buckets=15, width of each bucket=128
index | width | num   
    0 |   

In [19]:
bucket_dict_path = os.path.join(model_cfg['data_path'],'buckets_{0:s}.dict'.format('sp'))
bucket_dict = pickle.load(open(bucket_dict_path, "rb"))

In [20]:
bucket_dict.keys()

dict_keys(['fisher_dev', 'fisher_dev2', 'fisher_test', 'fisher_train', 'callhome_devtest', 'callhome_evltest', 'callhome_train'])

In [25]:
sum([len(b) for b in bucket_dict['fisher_dev']['buckets']])

3977

In [17]:
all_utts_2 = []
[all_utts_2.extend(b) for b in bucket_dict['fisher_train']['buckets']]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [22]:
all_utts_3 = []
[all_utts_3.extend(b) for b in bucket_dict['fisher_train']['buckets']]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [23]:
len(set(all_utts_3)), len(set(all_utts_2))

(69349, 69349)

In [24]:
set(all_utts_3) == set(all_utts_2)

True

In [None]:
os.chdir('..')
os.chdir('speech2text')

In [None]:
model_cfg['data_path']