In [6]:
!pip install pydub -q
!pip install ipdb -q
!pip install lxml

Collecting lxml
  Downloading lxml-4.6.2-cp36-cp36m-manylinux1_x86_64.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 6.4 MB/s eta 0:00:01
[?25hInstalling collected packages: lxml
Successfully installed lxml-4.6.2


# Move the  text and the speech in the same folder. 

In [1]:
import os
import os.path as osp
import numpy as np
from collections import defaultdict
from pprint import pprint
import pydub
import shutil
import requests
from lxml import etree
from multiprocessing import Pool
import glob
import sys
import ipdb

# HANDLE AUDIO

In [2]:
def from_mp3(mp3_file_path):
    return pydub.AudioSegment.from_mp3(mp3_file_path)

def mp3_to_wav_file(mp3, output_dir=None):
    if output_dir == None:
        output_dir = get_path(mp3)
    sound = from_mp3(mp3)
    sound = sound.set_channels(1)
    sound.export(os.path.join(output_dir, get_filename(mp3))+'_one_channel.wav', format="wav")
#     sound.export(os.path.join(output_dir, get_filename(mp3))+'.wav', format="wav")

def wav_one_channel(wav_file, output_dir=None):
    if output_dir == None:
        output_dir = get_path(wav_file)

    sound = pydub.AudioSegment.from_wav(wav_file)
    sound = sound.set_channels(1)
    sound.export(os.path.join(output_dir, get_filename(wav_file))+'_one_channel.wav', format="wav")
#     sound.export(os.path.join(output_dir, get_filename(wav_file))+'.wav', format="wav")

# PATH UTILS


In [3]:
def get_path(path):
    return os.path.split(path)[0]

def get_filename(path):
    return os.path.splitext(os.path.basename(path))[0]

In [4]:
# # !cp English text and all wav into dataset
# !mkdir ../dataset/english/maus_output
# !cp ../dataset/english/raw_txt/* ../dataset/english/wav/

In [5]:
outpath = '../dataset/english/wav/' #sys.argv[1]
audio_ext = 'mp3' #'mp3' #sys.argv[2]
text_ext = 'txt' #sys.argv[3]
lang = 'eng-US' #sys.argv[4] 
num_jobs = 7

In [6]:
os.path.exists(outpath)

True

In [7]:
os.cpu_count() 

8

In [8]:
import time

In [9]:
def do_job(tasks_to_accomplish):

    # log function
    def write_log(caption_name, stage):
        with open(osp.join(outpath, 'alignment_log.txt'), 'a') as log_file:
            log_file.write('{}\t{}\n'.format(caption_name, stage))

    index_caption, caption_name = tasks_to_accomplish
    pid = os.getpid()

    if not os.path.exists(osp.join(outpath, './{}.TextGrid'.format(caption_name))):
        if audio_ext == 'mp3':
            mp3_to_wav_file(osp.join(outpath,'{}.mp3'.format(caption_name)),outpath)
        if audio_ext == 'wav':
            wav_one_channel(osp.join(outpath,'{}.wav'.format(caption_name)),outpath)
        
        if text_ext != 'txt' and os.path.exists(osp.join(outpath,'{}.{}'.format(caption_name, text_ext))):
            pre, ext = os.path.splitext(osp.join(outpath,'{}.{}'.format(caption_name, text_ext)))
            os.rename(osp.join(outpath,'{}.{}'.format(caption_name, text_ext)), pre + '.txt')
        
        #print(caption_name)
        
        # build request
        url = 'https://clarin.phonetik.uni-muenchen.de/BASWebServices/services/runMAUSBasic'
        data = {r'LANGUAGE': lang, r'OUTFORMAT': r'TextGrid'}
        files = {r'TEXT': open(osp.join(outpath, '{}.txt'.format(caption_name)), 'rb'),
#                  r'SIGNAL': open(osp.join(outpath, '{}.wav'.format(caption_name)), 'rb')}
                 r'SIGNAL': open(osp.join(outpath, '{}_one_channel.wav'.format(caption_name)), 'rb')}
                
        
        
        print('Sending request ...')
        r = requests.post(url, files=files, data=data)
#         time.sleep(5)
        print('Processing results ...')

        if r.status_code == 200:
            root = etree.fromstring(r.text)
            success = root.find('success').text
            download_url = root.find('downloadLink').text

            if success != 'false':
                request_download = requests.get(download_url, stream=True)
                if request_download.status_code == 200:
                    try:
                        with open(osp.join(outpath, '{}.TextGrid'.format(caption_name)), 'wb') as f:
                            f.write(request_download.content)
                        print('{} [{}]: {} OK'.format(pid, index_caption, caption_name))
                    except:
                        write_log(caption_name, 'FAIL Write TextGrid')
                        print('{} [{}]: {} FAIL Write TextGrid'.format(pid, index_caption, caption_name))
                        pass
                else:
                    write_log(caption_name, 'FAIL Download TextGrid')
                    print('{} [{}]: {} FAIL Download TextGrid'.format(pid, index_caption, caption_name))
            else:
                write_log(caption_name, 'FAIL Alignment')
                print(r.text)
                print('{} [{}]: {} FAIL Alignment'.format(pid, index_caption, caption_name))
        else:
            write_log(caption_name, 'FAIL Alignment Request')
            print('{} [{}]: {} FAIL Alignment Request'.format(pid, index_caption, caption_name))

        # delete temp files
        os.remove(osp.join(outpath, '{}.{}'.format(caption_name, text_ext)))
        os.remove(osp.join(outpath, '{}.{}'.format(caption_name, audio_ext)))

    else:
        print('{} [{}]: {} SKIP'.format(pid, index_caption, caption_name))

In [10]:
def main():
    
    # get captions belonging to the test set
    file_list = [x for x in map(get_filename, glob.glob(osp.join(outpath, '*.{}'.format(audio_ext)))) if x.find('_one_channel')==-1]
    number_of_processes = min(int(num_jobs),16)
    tasks_to_accomplish = []

    for index_file, file_name in enumerate(file_list, 1):
        tasks_to_accomplish.append((index_file, file_name))
#     ipdb.set_trace()
    p = Pool(number_of_processes)
    p.map(do_job, tasks_to_accomplish)

In [11]:
print('\n\n{}\nUsing the following parameters:\n{} \
           \n\tSource folder: {}\
           \n\tAudio format: {}\
           \n\tText format: {}\
           \n\tLanguage: {}\
           \n\tJobs: {}\
         \n{}'.format('*'*50, '-'*50,
                      outpath, #outpath
                      audio_ext, #audio_ext
                      text_ext, #text_ext
                      lang, #lang
                      min(int(num_jobs),16),
                      '*'*50))

main()



**************************************************
Using the following parameters:
--------------------------------------------------            
	Source folder: ../dataset/english/wav/           
	Audio format: mp3           
	Text format: txt           
	Language: eng-US           
	Jobs: 7         
**************************************************


In [1]:
2+3

5

In [11]:
!mv ../dataset/english/wav/A04_007* ../dataset/english/temp/

In [12]:
!mv ../dataset/english/wav/A11_008* ../dataset/english/temp/

In [13]:
!mv ../dataset/english/wav/A19_119* ../dataset/english/temp/

mv: cannot stat '../dataset/english/wav/A19_119*': No such file or directory


In [14]:
!mv ../dataset/english/wav/A05_028* ../dataset/english/temp/

In [15]:
!mv ../dataset/english/wav/A26_016* ../dataset/english/temp/

In [None]:
# import IPython

# IPython.display.Audio(
#     "mass-dataset-mod/dataset/"+"English_World_English_Bible_NT_Drama/wav/B05___11_Acts________EN1WEBN2DA_one_channel.wav", rate= 1600)