In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import json, os, torch, statistics, glob, librosa, pickle, torchaudio
from tqdm import tqdm
import numpy as np
import torchaudio.functional as F
import torchaudio.transforms as T

mfcc_transform = T.MFCC(
    sample_rate=22050,
    n_mfcc=39,
    melkwargs={
      'n_fft': 2048,
      'n_mels': 256,
      'hop_length': 512,
      'mel_scale': 'htk',
    }
)
base_dir = './'

In [6]:
dirs = ['assamese_female_english', 'hindi_male_english', 'kannada_male_english', 'manipuri_female_english', 
        'tamil_male_english', 'gujarati_female_english', 'malayalam_male_english', 'rajasthani_male_english']

def extract_features(file_list, file_dir):
    file_type = file_dir.split('/')[-1].replace('.json', '')
    feature_dir = '/'.join(file_dir.split('/')[:-1])+'/39/'
    os.makedirs(os.path.dirname(feature_dir), exist_ok=True)
    feature_file = feature_dir+file_type+'_39.file'
    with open(feature_file, 'wb') as f:
        for file in tqdm(file_list):
            waveform, sample_rate = torchaudio.load(file['audio_filepath'])
            mfcc_features = mfcc_transform(waveform).mean(2).detach().numpy()
            pickle.dump(mfcc_features, f)
    print("completed", file_dir)

for _dir in dirs:
    manifests_path = base_dir + _dir + '/'
    print('_'*20)
    print(_dir)

    seed_file_dir = manifests_path + 'seed.json'
    seed_file = open(seed_file_dir)
    seed_list = [json.loads(line.strip()) for line in seed_file]

    selection_file_dir = manifests_path + 'selection.json'
    selection_file = open(selection_file_dir)
    selection_list = [json.loads(line.strip()) for line in selection_file]

    test_file_dir = manifests_path + 'test.json'
    test_file = open(test_file_dir)
    test_list = [json.loads(line.strip()) for line in test_file]

    print('seed_file_starting')
    print(seed_file_dir)
    extract_features(seed_list, seed_file_dir)
    print(len(seed_list))
    print('seed_file_ending ...\n')
#     break
    print('selection_file_starting')
    extract_features(selection_list, selection_file_dir)
    print(len(selection_list))
    print('selection_file_ending ...\n\n')
    
    print('test_file_starting')
    extract_features(test_list, test_file_dir)
    print(len(test_list))
    print('test_file_ending ...\n\n')


____________________
assamese_female_english
seed_file_starting
./assamese_female_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 116.57it/s]


completed ./assamese_female_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 5864/5864 [00:49<00:00, 118.85it/s]


completed ./assamese_female_english/selection.json
5864
selection_file_ending ...


test_file_starting


100%|██████████| 2262/2262 [00:19<00:00, 117.84it/s]


completed ./assamese_female_english/test.json
2262
test_file_ending ...


____________________
hindi_male_english
seed_file_starting
./hindi_male_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 113.87it/s]


completed ./hindi_male_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 3591/3591 [00:30<00:00, 116.61it/s]


completed ./hindi_male_english/selection.json
3591
selection_file_ending ...


test_file_starting


100%|██████████| 1386/1386 [00:11<00:00, 118.44it/s]


completed ./hindi_male_english/test.json
1386
test_file_ending ...


____________________
kannada_male_english
seed_file_starting
./kannada_male_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 110.59it/s]


completed ./kannada_male_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 3343/3343 [00:30<00:00, 111.31it/s]


completed ./kannada_male_english/selection.json
3343
selection_file_ending ...


test_file_starting


100%|██████████| 1290/1290 [00:11<00:00, 112.60it/s]


completed ./kannada_male_english/test.json
1290
test_file_ending ...


____________________
manipuri_female_english
seed_file_starting
./manipuri_female_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 130.77it/s]


completed ./manipuri_female_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 7024/7024 [00:52<00:00, 133.55it/s]


completed ./manipuri_female_english/selection.json
7024
selection_file_ending ...


test_file_starting


100%|██████████| 2709/2709 [00:20<00:00, 132.28it/s]


completed ./manipuri_female_english/test.json
2709
test_file_ending ...


____________________
tamil_male_english
seed_file_starting
./tamil_male_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 114.66it/s]


completed ./tamil_male_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 5282/5282 [00:46<00:00, 114.49it/s]


completed ./tamil_male_english/selection.json
5282
selection_file_ending ...


test_file_starting


100%|██████████| 2037/2037 [00:17<00:00, 115.49it/s]


completed ./tamil_male_english/test.json
2037
test_file_ending ...


____________________
gujarati_female_english
seed_file_starting
./gujarati_female_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 95.21it/s] 


completed ./gujarati_female_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 3269/3269 [00:35<00:00, 92.69it/s] 


completed ./gujarati_female_english/selection.json
3269
selection_file_ending ...


test_file_starting


100%|██████████| 1260/1260 [00:13<00:00, 94.19it/s] 


completed ./gujarati_female_english/test.json
1260
test_file_ending ...


____________________
malayalam_male_english
seed_file_starting
./malayalam_male_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 109.99it/s]


completed ./malayalam_male_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 3590/3590 [00:32<00:00, 112.13it/s]


completed ./malayalam_male_english/selection.json
3590
selection_file_ending ...


test_file_starting


100%|██████████| 1386/1386 [00:12<00:00, 112.71it/s]


completed ./malayalam_male_english/test.json
1386
test_file_ending ...


____________________
rajasthani_male_english
seed_file_starting
./rajasthani_male_english/seed.json


100%|██████████| 50/50 [00:00<00:00, 114.09it/s]


completed ./rajasthani_male_english/seed.json
50
seed_file_ending ...

selection_file_starting


100%|██████████| 3366/3366 [00:29<00:00, 113.42it/s]


completed ./rajasthani_male_english/selection.json
3366
selection_file_ending ...


test_file_starting


100%|██████████| 1298/1298 [00:11<00:00, 111.96it/s]


completed ./rajasthani_male_english/test.json
1298
test_file_ending ...




In [13]:
print(dirs)
dirs

['hindi', 'chinese', 'spanish', 'arabic', 'korean', 'vietnamese']


['hindi', 'chinese', 'spanish', 'arabic', 'korean', 'vietnamese']

In [6]:
for _dir in tqdm(dirs):
    manifests_path = base_dir + _dir + '/manifests/'
    print('_'*20)
    print(_dir)

    seed_file_dir = manifests_path + 'seed.json'
    seed_file = open(seed_file_dir)
    seed_list = [json.loads(line.strip()) for line in seed_file]

    selection_file_dir = manifests_path + 'selection.json'
    selection_file = open(selection_file_dir)
    selection_list = [json.loads(line.strip()) for line in selection_file]

    test_file_dir = manifests_path + 'test.json'
    test_file = open(test_file_dir)
    test_list = [json.loads(line.strip()) for line in test_file]

    print('seed_file_starting')
    print(seed_file_dir)
    extract_features(seed_list, seed_file_dir)
    print(len(seed_list))
    print('seed_file_ending ...\n')
#     break
    print('selection_file_starting')
    extract_features(selection_list, selection_file_dir)
    print(len(selection_list))
    print('selection_file_ending ...\n\n')
    
    print('test_file_starting')
    extract_features(test_list, test_file_dir)
    print(len(test_list))
    print('test_file_ending ...\n\n')

  0%|          | 0/6 [00:00<?, ?it/s]
  0%|          | 0/200 [00:00<?, ?it/s][A

____________________
hindi
seed_file_starting
/home/mayank/MTP/begin_again/Error-Driven-ASR-Personalization/CMU_expts/accent/hindi/manifests/seed.json



  0%|          | 1/200 [00:00<00:45,  4.33it/s][A
  1%|          | 2/200 [00:00<00:55,  3.59it/s][A
  2%|▏         | 4/200 [00:00<00:45,  4.30it/s][A
  4%|▎         | 7/200 [00:01<00:35,  5.37it/s][A
  6%|▌         | 11/200 [00:01<00:29,  6.50it/s][A
  6%|▌         | 12/200 [00:01<00:31,  6.03it/s][A
  6%|▋         | 13/200 [00:01<00:36,  5.15it/s][A
  7%|▋         | 14/200 [00:02<00:52,  3.56it/s][A
  8%|▊         | 16/200 [00:02<00:42,  4.31it/s][A
 10%|█         | 20/200 [00:02<00:32,  5.49it/s][A
 12%|█▏        | 24/200 [00:03<00:25,  6.78it/s][A
 13%|█▎        | 26/200 [00:03<00:22,  7.75it/s][A
 16%|█▌        | 32/200 [00:03<00:17,  9.63it/s][A
 18%|█▊        | 36/200 [00:03<00:15, 10.74it/s][A
 20%|█▉        | 39/200 [00:04<00:14, 11.16it/s][A
 22%|██▏       | 43/200 [00:04<00:11, 14.06it/s][A
 23%|██▎       | 46/200 [00:04<00:12, 12.60it/s][A
 24%|██▍       | 48/200 [00:04<00:16,  9.37it/s][A
 26%|██▌       | 52/200 [00:05<00:13, 10.94it/s][A
 28%|██▊       

200
seed_file_ending ...

selection_file_starting



  0%|          | 5/2965 [00:00<02:20, 21.03it/s][A
  0%|          | 6/2965 [00:00<04:06, 12.02it/s][A
  0%|          | 7/2965 [00:00<06:42,  7.34it/s][A
  0%|          | 8/2965 [00:00<09:18,  5.30it/s][A
  0%|          | 9/2965 [00:01<10:07,  4.87it/s][A
  0%|          | 12/2965 [00:01<08:17,  5.93it/s][A
  0%|          | 14/2965 [00:01<07:57,  6.18it/s][A
  1%|          | 17/2965 [00:01<06:36,  7.43it/s][A
  1%|          | 21/2965 [00:02<05:31,  8.89it/s][A
  1%|          | 24/2965 [00:02<04:57,  9.89it/s][A
  1%|          | 29/2965 [00:02<03:58, 12.29it/s][A
  1%|          | 31/2965 [00:02<04:51, 10.06it/s][A
  1%|          | 33/2965 [00:03<06:03,  8.07it/s][A
  1%|          | 37/2965 [00:03<05:05,  9.58it/s][A
  1%|▏         | 40/2965 [00:03<04:37, 10.55it/s][A
  1%|▏         | 43/2965 [00:03<04:10, 11.66it/s][A
  2%|▏         | 45/2965 [00:04<04:54,  9.91it/s][A
  2%|▏         | 49/2965 [00:04<04:19, 11.24it/s][A
  2%|▏         | 51/2965 [00:04<05:36,  8.65it/s]

 25%|██▌       | 754/2965 [01:16<03:19, 11.07it/s][A
 25%|██▌       | 756/2965 [01:16<03:37, 10.17it/s][A
 26%|██▌       | 759/2965 [01:17<03:22, 10.88it/s][A
 26%|██▌       | 761/2965 [01:17<03:33, 10.33it/s][A
 26%|██▌       | 765/2965 [01:17<02:59, 12.26it/s][A
 26%|██▌       | 767/2965 [01:17<03:38, 10.06it/s][A
 26%|██▌       | 771/2965 [01:17<03:03, 11.97it/s][A
 26%|██▌       | 773/2965 [01:18<02:47, 13.07it/s][A
 26%|██▌       | 775/2965 [01:18<03:29, 10.45it/s][A
 26%|██▌       | 778/2965 [01:18<03:09, 11.56it/s][A
 26%|██▋       | 781/2965 [01:18<03:11, 11.40it/s][A
 26%|██▋       | 783/2965 [01:19<03:44,  9.73it/s][A
 27%|██▋       | 787/2965 [01:19<03:08, 11.58it/s][A
 27%|██▋       | 789/2965 [01:19<02:47, 13.00it/s][A
 27%|██▋       | 792/2965 [01:19<02:41, 13.43it/s][A
 27%|██▋       | 797/2965 [01:19<02:06, 17.14it/s][A
 27%|██▋       | 800/2965 [01:20<02:46, 13.03it/s][A
 27%|██▋       | 803/2965 [01:20<03:34, 10.06it/s][A
 27%|██▋       | 806/2965 [0

 51%|█████     | 1501/2965 [02:33<02:45,  8.84it/s][A
 51%|█████     | 1505/2965 [02:33<02:26,  9.94it/s][A
 51%|█████     | 1507/2965 [02:33<02:28,  9.79it/s][A
 51%|█████     | 1512/2965 [02:34<02:03, 11.76it/s][A
 51%|█████     | 1514/2965 [02:34<02:15, 10.68it/s][A
 51%|█████     | 1516/2965 [02:34<02:22, 10.16it/s][A
 51%|█████     | 1518/2965 [02:34<02:23, 10.08it/s][A
 51%|█████▏    | 1520/2965 [02:35<02:31,  9.56it/s][A
 51%|█████▏    | 1522/2965 [02:35<02:31,  9.55it/s][A
 51%|█████▏    | 1524/2965 [02:35<02:51,  8.39it/s][A
 51%|█████▏    | 1525/2965 [02:35<03:10,  7.55it/s][A
 52%|█████▏    | 1527/2965 [02:35<03:12,  7.46it/s][A
 52%|█████▏    | 1529/2965 [02:36<03:05,  7.73it/s][A
 52%|█████▏    | 1531/2965 [02:36<03:01,  7.90it/s][A
 52%|█████▏    | 1534/2965 [02:36<02:31,  9.42it/s][A
 52%|█████▏    | 1536/2965 [02:36<02:18, 10.33it/s][A
 52%|█████▏    | 1538/2965 [02:37<02:31,  9.39it/s][A
 52%|█████▏    | 1541/2965 [02:37<02:29,  9.55it/s][A
 52%|█████

 76%|███████▌  | 2241/2965 [03:47<01:25,  8.47it/s][A
 76%|███████▌  | 2243/2965 [03:47<01:24,  8.54it/s][A
 76%|███████▌  | 2246/2965 [03:47<01:16,  9.35it/s][A
 76%|███████▌  | 2248/2965 [03:48<01:23,  8.54it/s][A
 76%|███████▌  | 2250/2965 [03:48<01:17,  9.20it/s][A
 76%|███████▌  | 2255/2965 [03:48<00:58, 12.05it/s][A
 76%|███████▌  | 2257/2965 [03:48<01:00, 11.73it/s][A
 76%|███████▋  | 2261/2965 [03:48<00:54, 13.03it/s][A
 76%|███████▋  | 2266/2965 [03:49<00:49, 14.21it/s][A
 77%|███████▋  | 2269/2965 [03:49<00:49, 14.12it/s][A
 77%|███████▋  | 2271/2965 [03:49<00:59, 11.58it/s][A
 77%|███████▋  | 2275/2965 [03:49<00:53, 12.82it/s][A
 77%|███████▋  | 2277/2965 [03:50<01:17,  8.90it/s][A
 77%|███████▋  | 2279/2965 [03:50<01:12,  9.43it/s][A
 77%|███████▋  | 2283/2965 [03:50<01:03, 10.76it/s][A
 77%|███████▋  | 2286/2965 [03:50<00:59, 11.34it/s][A
 77%|███████▋  | 2289/2965 [03:51<00:56, 11.95it/s][A
 77%|███████▋  | 2291/2965 [03:51<01:21,  8.28it/s][A
 77%|█████

2965
selection_file_ending ...


test_file_starting



  0%|          | 6/1224 [00:00<00:44, 27.31it/s][A
  1%|          | 7/1224 [00:00<03:02,  6.67it/s][A
  1%|          | 8/1224 [00:00<03:38,  5.56it/s][A
  1%|▏         | 18/1224 [00:00<02:35,  7.75it/s][A
  3%|▎         | 31/1224 [00:01<01:50, 10.77it/s][A
  3%|▎         | 38/1224 [00:01<01:31, 12.93it/s][A
  4%|▎         | 43/1224 [00:01<01:25, 13.77it/s][A
  4%|▍         | 53/1224 [00:01<01:03, 18.53it/s][A
  5%|▌         | 65/1224 [00:01<00:46, 24.71it/s][A
  6%|▌         | 76/1224 [00:02<00:35, 32.20it/s][A
  7%|▋         | 88/1224 [00:02<00:27, 41.08it/s][A
  8%|▊         | 102/1224 [00:02<00:22, 49.90it/s][A
  9%|▉         | 114/1224 [00:02<00:18, 60.15it/s][A
 11%|█         | 129/1224 [00:02<00:15, 72.47it/s][A
 12%|█▏        | 142/1224 [00:02<00:12, 83.42it/s][A
 13%|█▎        | 156/1224 [00:02<00:11, 94.33it/s][A
 14%|█▍        | 169/1224 [00:02<00:10, 100.52it/s][A
 15%|█▌        | 184/1224 [00:02<00:09, 109.97it/s][A
 16%|█▌        | 198/1224 [00:02<00:08,

1224
test_file_ending ...


____________________
chinese
seed_file_starting
/home/mayank/MTP/begin_again/Error-Driven-ASR-Personalization/CMU_expts/accent/chinese/manifests/seed.json



 14%|█▎        | 27/200 [00:00<00:01, 126.55it/s][A
 20%|██        | 41/200 [00:00<00:01, 129.19it/s][A
 27%|██▋       | 54/200 [00:00<00:01, 127.94it/s][A
 34%|███▍      | 68/200 [00:00<00:01, 129.48it/s][A
 40%|███▉      | 79/200 [00:00<00:00, 121.72it/s][A
 46%|████▌     | 92/200 [00:00<00:00, 124.00it/s][A
 53%|█████▎    | 106/200 [00:00<00:00, 127.08it/s][A
 60%|██████    | 120/200 [00:00<00:00, 129.75it/s][A
 67%|██████▋   | 134/200 [00:01<00:00, 132.16it/s][A
 74%|███████▎  | 147/200 [00:01<00:00, 126.49it/s][A
 80%|████████  | 161/200 [00:01<00:00, 127.82it/s][A
 87%|████████▋ | 174/200 [00:01<00:00, 127.29it/s][A
 94%|█████████▎| 187/200 [00:01<00:00, 125.30it/s][A
100%|██████████| 200/200 [00:01<00:00, 126.88it/s][A

  0%|          | 0/2965 [00:00<?, ?it/s][A
  0%|          | 13/2965 [00:00<00:24, 122.89it/s][A

200
seed_file_ending ...

selection_file_starting



  1%|          | 28/2965 [00:00<00:22, 127.96it/s][A
  1%|▏         | 42/2965 [00:00<00:22, 130.65it/s][A
  2%|▏         | 54/2965 [00:00<00:23, 124.86it/s][A
  2%|▏         | 68/2965 [00:00<00:22, 127.62it/s][A
  3%|▎         | 82/2965 [00:00<00:22, 128.21it/s][A
  3%|▎         | 94/2965 [00:00<00:23, 122.51it/s][A
  4%|▎         | 108/2965 [00:00<00:22, 124.61it/s][A
  4%|▍         | 122/2965 [00:00<00:22, 127.30it/s][A
  5%|▍         | 135/2965 [00:01<00:22, 126.56it/s][A
  5%|▍         | 148/2965 [00:01<00:22, 124.81it/s][A
  5%|▌         | 162/2965 [00:01<00:22, 126.22it/s][A
  6%|▌         | 175/2965 [00:01<00:22, 124.42it/s][A
  6%|▋         | 188/2965 [00:01<00:22, 124.59it/s][A
  7%|▋         | 201/2965 [00:01<00:21, 126.16it/s][A
  7%|▋         | 214/2965 [00:01<00:22, 121.59it/s][A
  8%|▊         | 227/2965 [00:01<00:23, 114.79it/s][A
  8%|▊         | 241/2965 [00:01<00:22, 118.48it/s][A
  9%|▊         | 253/2965 [00:02<00:25, 106.66it/s][A
  9%|▉         

2965
selection_file_ending ...


test_file_starting



  1%|▏         | 16/1224 [00:00<00:37, 31.86it/s] [A
  2%|▏         | 29/1224 [00:00<00:29, 41.05it/s][A
  4%|▎         | 45/1224 [00:00<00:22, 52.60it/s][A
  4%|▍         | 55/1224 [00:00<00:19, 60.84it/s][A
  6%|▌         | 68/1224 [00:00<00:16, 71.98it/s][A
  7%|▋         | 81/1224 [00:00<00:13, 81.86it/s][A
  8%|▊         | 93/1224 [00:01<00:12, 90.47it/s][A
  9%|▊         | 106/1224 [00:01<00:11, 97.76it/s][A
 10%|▉         | 119/1224 [00:01<00:10, 103.62it/s][A
 11%|█         | 132/1224 [00:01<00:09, 109.23it/s][A
 12%|█▏        | 145/1224 [00:01<00:09, 113.50it/s][A
 13%|█▎        | 159/1224 [00:01<00:09, 117.91it/s][A
 14%|█▍        | 172/1224 [00:01<00:08, 119.94it/s][A
 15%|█▌        | 186/1224 [00:01<00:08, 122.71it/s][A
 16%|█▋        | 199/1224 [00:01<00:08, 123.87it/s][A
 17%|█▋        | 212/1224 [00:02<00:08, 118.28it/s][A
 19%|█▊        | 227/1224 [00:02<00:07, 125.04it/s][A
 20%|█▉        | 240/1224 [00:02<00:08, 120.77it/s][A
 21%|██        | 253/12

1224
test_file_ending ...


____________________
spanish
seed_file_starting
/home/mayank/MTP/begin_again/Error-Driven-ASR-Personalization/CMU_expts/accent/spanish/manifests/seed.json



  8%|▊         | 16/200 [00:00<00:07, 25.43it/s][A
 14%|█▍        | 28/200 [00:00<00:05, 33.08it/s][A
 20%|██        | 41/200 [00:00<00:03, 42.28it/s][A
 26%|██▋       | 53/200 [00:00<00:02, 52.47it/s][A
 33%|███▎      | 66/200 [00:00<00:02, 63.53it/s][A
 40%|███▉      | 79/200 [00:00<00:01, 74.54it/s][A
 45%|████▌     | 90/200 [00:00<00:01, 82.34it/s][A
 50%|█████     | 101/200 [00:01<00:01, 85.44it/s][A
 56%|█████▋    | 113/200 [00:01<00:00, 93.34it/s][A
 62%|██████▎   | 125/200 [00:01<00:00, 98.82it/s][A
 70%|██████▉   | 139/200 [00:01<00:00, 107.64it/s][A
 77%|███████▋  | 154/200 [00:01<00:00, 116.22it/s][A
 84%|████████▎ | 167/200 [00:01<00:00, 113.76it/s][A
 90%|█████████ | 180/200 [00:01<00:00, 112.06it/s][A
100%|██████████| 200/200 [00:01<00:00, 109.84it/s][A

  0%|          | 0/2879 [00:00<?, ?it/s][A
  0%|          | 10/2879 [00:00<00:29, 95.74it/s][A

200
seed_file_ending ...

selection_file_starting



  1%|          | 26/2879 [00:00<00:26, 107.93it/s][A
  1%|▏         | 41/2879 [00:00<00:24, 116.81it/s][A
  2%|▏         | 55/2879 [00:00<00:23, 121.57it/s][A
  2%|▏         | 68/2879 [00:00<00:22, 122.59it/s][A
  3%|▎         | 81/2879 [00:00<00:22, 123.35it/s][A
  3%|▎         | 93/2879 [00:00<00:23, 120.84it/s][A
  4%|▎         | 106/2879 [00:00<00:22, 120.71it/s][A
  4%|▍         | 118/2879 [00:00<00:23, 119.04it/s][A
  5%|▍         | 131/2879 [00:01<00:23, 119.45it/s][A
  5%|▌         | 144/2879 [00:01<00:22, 121.78it/s][A
  5%|▌         | 158/2879 [00:01<00:21, 124.68it/s][A
  6%|▌         | 171/2879 [00:01<00:22, 122.95it/s][A
  6%|▋         | 185/2879 [00:01<00:21, 126.61it/s][A
  7%|▋         | 198/2879 [00:01<00:21, 127.60it/s][A
  7%|▋         | 211/2879 [00:01<00:21, 125.28it/s][A
  8%|▊         | 225/2879 [00:01<00:20, 128.01it/s][A
  8%|▊         | 239/2879 [00:01<00:20, 129.79it/s][A
  9%|▉         | 252/2879 [00:01<00:20, 128.48it/s][A
  9%|▉         

2879
selection_file_ending ...


test_file_starting



  2%|▏         | 22/1191 [00:00<00:11, 99.93it/s][A
  3%|▎         | 36/1191 [00:00<00:10, 108.49it/s][A
  4%|▍         | 49/1191 [00:00<00:10, 112.96it/s][A
  5%|▌         | 62/1191 [00:00<00:09, 116.00it/s][A
  6%|▋         | 76/1191 [00:00<00:09, 120.08it/s][A
  7%|▋         | 88/1191 [00:00<00:09, 113.21it/s][A
  8%|▊         | 99/1191 [00:01<00:23, 47.34it/s] [A
  9%|▉         | 113/1191 [00:01<00:18, 58.47it/s][A
 10%|█         | 123/1191 [00:01<00:16, 65.22it/s][A
 11%|█         | 133/1191 [00:01<00:21, 50.36it/s][A
 12%|█▏        | 145/1191 [00:01<00:17, 59.90it/s][A
 13%|█▎        | 158/1191 [00:02<00:14, 70.99it/s][A
 14%|█▍        | 171/1191 [00:02<00:12, 81.22it/s][A
 15%|█▌        | 184/1191 [00:02<00:11, 90.42it/s][A
 16%|█▋        | 196/1191 [00:02<00:10, 97.57it/s][A
 18%|█▊        | 209/1191 [00:02<00:09, 103.43it/s][A
 19%|█▊        | 222/1191 [00:02<00:09, 107.04it/s][A
 20%|█▉        | 234/1191 [00:02<00:08, 109.41it/s][A
 21%|██        | 246/1191

1191
test_file_ending ...


____________________
arabic
seed_file_starting
/home/mayank/MTP/begin_again/Error-Driven-ASR-Personalization/CMU_expts/accent/arabic/manifests/seed.json



 13%|█▎        | 26/200 [00:00<00:01, 127.45it/s][A
 18%|█▊        | 37/200 [00:00<00:01, 119.40it/s][A
 26%|██▌       | 51/200 [00:00<00:01, 123.74it/s][A
 32%|███▏      | 64/200 [00:00<00:01, 124.56it/s][A
 38%|███▊      | 76/200 [00:00<00:01, 120.98it/s][A
 44%|████▍     | 89/200 [00:00<00:00, 122.34it/s][A
 51%|█████     | 102/200 [00:00<00:00, 124.30it/s][A
 57%|█████▋    | 114/200 [00:00<00:00, 120.24it/s][A
 64%|██████▎   | 127/200 [00:01<00:00, 121.66it/s][A
 70%|███████   | 140/200 [00:01<00:00, 119.79it/s][A
 76%|███████▌  | 152/200 [00:01<00:00, 113.87it/s][A
 82%|████████▏ | 164/200 [00:01<00:00, 113.65it/s][A
 88%|████████▊ | 177/200 [00:01<00:00, 116.84it/s][A
100%|██████████| 200/200 [00:01<00:00, 118.87it/s][A

  0%|          | 0/2854 [00:00<?, ?it/s][A
  0%|          | 11/2854 [00:00<00:26, 106.48it/s][A

200
seed_file_ending ...

selection_file_starting



  1%|          | 24/2854 [00:00<00:25, 111.16it/s][A
  1%|▏         | 37/2854 [00:00<00:24, 115.93it/s][A
  2%|▏         | 47/2854 [00:00<00:25, 108.45it/s][A
  2%|▏         | 58/2854 [00:00<00:26, 104.50it/s][A
  2%|▏         | 71/2854 [00:00<00:25, 109.43it/s][A
  3%|▎         | 85/2854 [00:00<00:23, 115.93it/s][A
  3%|▎         | 96/2854 [00:00<00:26, 103.73it/s][A
  4%|▍         | 109/2854 [00:00<00:25, 109.34it/s][A
  4%|▍         | 121/2854 [00:01<00:24, 111.04it/s][A
  5%|▍         | 135/2854 [00:01<00:23, 117.20it/s][A
  5%|▌         | 148/2854 [00:01<00:22, 119.84it/s][A
  6%|▌         | 161/2854 [00:01<00:24, 112.10it/s][A
  6%|▌         | 175/2854 [00:01<00:22, 116.90it/s][A
  7%|▋         | 188/2854 [00:01<00:22, 120.51it/s][A
  7%|▋         | 203/2854 [00:01<00:21, 125.57it/s][A
  8%|▊         | 216/2854 [00:01<00:21, 122.55it/s][A
  8%|▊         | 229/2854 [00:01<00:23, 111.78it/s][A
  8%|▊         | 242/2854 [00:02<00:22, 116.06it/s][A
  9%|▉         |

2854
selection_file_ending ...


test_file_starting



  2%|▏         | 18/1182 [00:00<00:17, 64.78it/s][A
  2%|▏         | 22/1182 [00:00<01:00, 19.06it/s][A
  3%|▎         | 34/1182 [00:00<00:45, 25.48it/s][A
  4%|▍         | 46/1182 [00:01<00:34, 33.26it/s][A
  5%|▍         | 56/1182 [00:01<00:27, 41.57it/s][A
  6%|▌         | 71/1182 [00:01<00:21, 52.64it/s][A
  7%|▋         | 82/1182 [00:01<00:17, 62.36it/s][A
  8%|▊         | 94/1182 [00:01<00:15, 72.39it/s][A
  9%|▉         | 108/1182 [00:01<00:12, 82.86it/s][A
 10%|█         | 122/1182 [00:01<00:11, 93.61it/s][A
 12%|█▏        | 137/1182 [00:01<00:09, 105.37it/s][A
 13%|█▎        | 150/1182 [00:01<00:09, 109.70it/s][A
 14%|█▍        | 163/1182 [00:02<00:08, 114.90it/s][A
 15%|█▍        | 177/1182 [00:02<00:08, 120.96it/s][A
 16%|█▌        | 191/1182 [00:02<00:08, 121.57it/s][A
 17%|█▋        | 204/1182 [00:02<00:07, 123.97it/s][A
 19%|█▊        | 220/1182 [00:02<00:07, 132.34it/s][A
 20%|█▉        | 234/1182 [00:02<00:07, 129.23it/s][A
 21%|██        | 248/1182 [

1182
test_file_ending ...


____________________
korean
seed_file_starting
/home/mayank/MTP/begin_again/Error-Driven-ASR-Personalization/CMU_expts/accent/korean/manifests/seed.json



 12%|█▏        | 23/200 [00:00<00:01, 102.79it/s][A
 18%|█▊        | 37/200 [00:00<00:01, 109.60it/s][A
 24%|██▍       | 49/200 [00:00<00:01, 111.86it/s][A
 30%|██▉       | 59/200 [00:00<00:01, 99.66it/s] [A
 34%|███▍      | 69/200 [00:00<00:01, 98.57it/s][A
 40%|████      | 81/200 [00:00<00:01, 103.57it/s][A
 46%|████▌     | 92/200 [00:00<00:01, 104.87it/s][A
 52%|█████▏    | 104/200 [00:00<00:00, 107.85it/s][A
 59%|█████▉    | 118/200 [00:01<00:00, 114.69it/s][A
 66%|██████▌   | 131/200 [00:01<00:00, 118.88it/s][A
 72%|███████▏  | 144/200 [00:01<00:00, 122.00it/s][A
 78%|███████▊  | 157/200 [00:01<00:00, 118.85it/s][A
 85%|████████▌ | 170/200 [00:01<00:00, 120.37it/s][A
 92%|█████████▏| 183/200 [00:01<00:00, 120.62it/s][A
100%|██████████| 200/200 [00:01<00:00, 115.97it/s][A

  0%|          | 0/2964 [00:00<?, ?it/s][A
  0%|          | 13/2964 [00:00<00:22, 128.90it/s][A

200
seed_file_ending ...

selection_file_starting



  1%|          | 28/2964 [00:00<00:22, 133.03it/s][A
  1%|▏         | 39/2964 [00:00<00:23, 123.61it/s][A
  2%|▏         | 52/2964 [00:00<00:23, 123.98it/s][A
  2%|▏         | 66/2964 [00:00<00:23, 125.69it/s][A
  3%|▎         | 80/2964 [00:00<00:22, 127.86it/s][A
  3%|▎         | 95/2964 [00:00<00:21, 131.32it/s][A
  4%|▎         | 108/2964 [00:00<00:24, 118.77it/s][A
  4%|▍         | 122/2964 [00:00<00:23, 123.37it/s][A
  5%|▍         | 135/2964 [00:01<00:22, 124.25it/s][A
  5%|▍         | 148/2964 [00:01<00:23, 121.59it/s][A
  5%|▌         | 161/2964 [00:01<00:22, 122.73it/s][A
  6%|▌         | 174/2964 [00:01<00:22, 124.74it/s][A
  6%|▋         | 187/2964 [00:01<00:22, 125.16it/s][A
  7%|▋         | 202/2964 [00:01<00:21, 129.86it/s][A
  7%|▋         | 216/2964 [00:01<00:21, 128.59it/s][A
  8%|▊         | 229/2964 [00:01<00:22, 123.29it/s][A
  8%|▊         | 243/2964 [00:01<00:21, 127.41it/s][A
  9%|▊         | 257/2964 [00:02<00:20, 129.14it/s][A
  9%|▉         

2964
selection_file_ending ...


test_file_starting



  2%|▏         | 25/1224 [00:00<00:10, 109.82it/s][A
  3%|▎         | 39/1224 [00:00<00:10, 116.23it/s][A
  4%|▍         | 52/1224 [00:00<00:09, 117.57it/s][A
  5%|▌         | 67/1224 [00:00<00:09, 124.77it/s][A
  7%|▋         | 81/1224 [00:00<00:08, 128.51it/s][A
  8%|▊         | 94/1224 [00:00<00:08, 127.44it/s][A
  9%|▉         | 108/1224 [00:00<00:08, 129.42it/s][A
 10%|▉         | 121/1224 [00:00<00:08, 128.16it/s][A
 11%|█         | 134/1224 [00:01<00:09, 119.93it/s][A
 12%|█▏        | 147/1224 [00:01<00:08, 120.27it/s][A
 13%|█▎        | 159/1224 [00:01<00:08, 118.76it/s][A
 14%|█▍        | 173/1224 [00:01<00:08, 121.73it/s][A
 15%|█▌        | 187/1224 [00:01<00:08, 122.80it/s][A
 16%|█▋        | 200/1224 [00:01<00:08, 123.47it/s][A
 17%|█▋        | 213/1224 [00:01<00:08, 123.68it/s][A
 18%|█▊        | 226/1224 [00:01<00:08, 121.53it/s][A
 20%|█▉        | 239/1224 [00:01<00:07, 123.63it/s][A
 21%|██        | 253/1224 [00:02<00:07, 126.42it/s][A
 22%|██▏       

1224
test_file_ending ...


____________________
vietnamese
seed_file_starting
/home/mayank/MTP/begin_again/Error-Driven-ASR-Personalization/CMU_expts/accent/vietnamese/manifests/seed.json



 12%|█▎        | 25/200 [00:00<00:01, 121.30it/s][A
 20%|██        | 40/200 [00:00<00:01, 127.41it/s][A
 26%|██▋       | 53/200 [00:00<00:01, 126.68it/s][A
 32%|███▏      | 64/200 [00:00<00:01, 121.16it/s][A
 38%|███▊      | 75/200 [00:00<00:01, 114.67it/s][A
 44%|████▍     | 88/200 [00:00<00:00, 116.54it/s][A
 50%|█████     | 100/200 [00:00<00:00, 117.32it/s][A
 56%|█████▋    | 113/200 [00:00<00:00, 118.84it/s][A
 64%|██████▎   | 127/200 [00:01<00:00, 123.82it/s][A
 70%|███████   | 141/200 [00:01<00:00, 127.98it/s][A
 77%|███████▋  | 154/200 [00:01<00:00, 125.72it/s][A
 84%|████████▎ | 167/200 [00:01<00:00, 121.11it/s][A
 90%|█████████ | 181/200 [00:01<00:00, 123.90it/s][A
100%|██████████| 200/200 [00:01<00:00, 123.74it/s][A

  0%|          | 0/2968 [00:00<?, ?it/s][A
  0%|          | 14/2968 [00:00<00:22, 134.15it/s][A

200
seed_file_ending ...

selection_file_starting



  1%|          | 26/2968 [00:00<00:22, 127.97it/s][A
  1%|▏         | 39/2968 [00:00<00:23, 126.90it/s][A
  2%|▏         | 53/2968 [00:00<00:22, 129.51it/s][A
  2%|▏         | 67/2968 [00:00<00:22, 131.04it/s][A
  3%|▎         | 79/2968 [00:00<00:23, 124.35it/s][A
  3%|▎         | 93/2968 [00:00<00:22, 128.50it/s][A
  4%|▎         | 106/2968 [00:00<00:22, 128.30it/s][A
  4%|▍         | 120/2968 [00:00<00:21, 131.39it/s][A
  4%|▍         | 133/2968 [00:01<00:22, 125.62it/s][A
  5%|▍         | 146/2968 [00:01<00:24, 114.95it/s][A
  5%|▌         | 160/2968 [00:01<00:23, 119.86it/s][A
  6%|▌         | 173/2968 [00:01<00:24, 114.10it/s][A
  6%|▌         | 185/2968 [00:02<01:13, 37.66it/s] [A
  7%|▋         | 194/2968 [00:02<01:02, 44.07it/s][A
  7%|▋         | 205/2968 [00:02<00:51, 53.43it/s][A
  7%|▋         | 216/2968 [00:02<00:43, 63.17it/s][A
  8%|▊         | 229/2968 [00:02<00:37, 73.67it/s][A
  8%|▊         | 242/2968 [00:02<00:32, 84.04it/s][A
  9%|▊         | 256

2968
selection_file_ending ...


test_file_starting



  2%|▏         | 24/1224 [00:00<00:11, 107.36it/s][A
  3%|▎         | 37/1224 [00:00<00:10, 111.78it/s][A
  4%|▍         | 51/1224 [00:00<00:10, 116.62it/s][A
  5%|▌         | 65/1224 [00:00<00:09, 120.24it/s][A
  6%|▋         | 79/1224 [00:00<00:09, 124.17it/s][A
  8%|▊         | 93/1224 [00:00<00:08, 125.82it/s][A
  9%|▊         | 106/1224 [00:00<00:08, 127.00it/s][A
 10%|▉         | 120/1224 [00:00<00:08, 129.26it/s][A
 11%|█         | 133/1224 [00:01<00:08, 129.47it/s][A
 12%|█▏        | 146/1224 [00:01<00:08, 129.62it/s][A
 13%|█▎        | 159/1224 [00:01<00:08, 126.58it/s][A
 14%|█▍        | 172/1224 [00:01<00:08, 126.19it/s][A
 15%|█▌        | 186/1224 [00:01<00:08, 127.22it/s][A
 16%|█▋        | 199/1224 [00:01<00:08, 127.99it/s][A
 17%|█▋        | 212/1224 [00:01<00:07, 127.45it/s][A
 18%|█▊        | 225/1224 [00:01<00:07, 126.81it/s][A
 19%|█▉        | 238/1224 [00:01<00:07, 127.35it/s][A
 21%|██        | 251/1224 [00:01<00:07, 126.20it/s][A
 22%|██▏       

1224
test_file_ending ...





