In [4]:
"""
Use this script to reformat wav files from IEEE-FLOAT to PCM 16
place the files in a directory with the following hierarchy:
data_directory/group/speaker/[file_id1.wav, file_id2.wav, ...,
                              speaker.trans.txt]
Note that speaker.trans.txt will need to be edited to include the transcriptions
"""
import os
import soundfile as sf
import argparse


def main(input_directory, data_directory, group, speaker, chapter):
    wav_file_count = 0
    save_path = os.path.join(data_directory, group, speaker, chapter)
    idtag = speaker+'-'+chapter
    transcript_filename = idtag + '.trans.txt'
    os.makedirs(save_path, exist_ok=True)
    outfile = open(os.path.join(save_path, transcript_filename), 'w')
    save_path = os.path.join(data_directory, group, speaker, chapter)
    for file in os.listdir(input_directory):
        if file.endswith(".wav"):
            data, samplerate = sf.read(os.path.join(input_directory,file))
            sf.write('testwavout.wav',data,samplerate)
            # save the file to its new place
            ident = idtag + '-' + '{:04d}'.format(wav_file_count)
            new_filename = ident+'.wav'
            print(ident)
            os.replace('testwavout.wav',os.path.join(save_path,new_filename))
            wav_file_count += 1
            outfile.write(ident+' \n')
    outfile.close()


if __name__ == '__main__':
    '''parser = argparse.ArgumentParser()
    parser.add_argument('input_directory', type=str,
                        help='Path to input directory')
    parser.add_argument('data_directory', type=str,
                        help='Path to output data directory')
    parser.add_argument('group', type=str,
                        help='group')
    parser.add_argument('speaker', type=str,
                        help='speaker number')
    parser.add_argument('chapter', type=str,
                        help='chapter number')
    args = parser.parse_args()
    
    main(args.input_directory, args.data_directory, args.group, args.speaker, args.chapter)'''
    
    input_directory = 'Testing_waves'
    data_directory = 'MySpeech'
    group = 'my_dev'
    speaker = '001'
    chapter = '000001'
       
    main(input_directory, data_directory, group, speaker, chapter)

001-000001-0000
001-000001-0001
001-000001-0002
001-000001-0003
001-000001-0004


In [11]:
"""
Use this script to create JSON-Line description files that can be used to
train deep-speech models through this library.
This works with data directories that are organized like LibriSpeech:
data_directory/group/speaker/[file_id1.wav, file_id2.wav, ...,
                              speaker.trans.txt]

Where speaker.trans.txt has in each line, file_id transcription

NOTE: this file is from the https://github.com/baidu-research/ba-dls-deepspeech repository
"""

from __future__ import absolute_import, division, print_function

import argparse
import json
import os
import wave


def main(data_directory, output_file):
    labels = []
    durations = []
    keys = []
    for group in os.listdir(data_directory):
        if  not group.startswith("."):
            speaker_path = os.path.join(data_directory, group)
            for speaker in os.listdir(speaker_path):
                if  not speaker.startswith("."):
                    labels_file = os.path.join(speaker_path, speaker,
                                               '{}-{}.trans.txt'
                                               .format(group, speaker))
                    for line in open(labels_file):
                        split = line.strip().split()
                        file_id = split[0]
                        label = ' '.join(split[1:]).lower()
                        audio_file = os.path.join(speaker_path, speaker,
                                                  file_id) + '.wav'
                        audio = wave.open(audio_file)
                        duration = float(audio.getnframes()) / audio.getframerate()
                        audio.close()
                        keys.append(audio_file)
                        durations.append(duration)
                        labels.append(label)
    with open(output_file, 'w') as out_file:
        for i in range(len(keys)):
            line = json.dumps({'key': keys[i], 'duration': durations[i],
                              'text': labels[i]})
            out_file.write(line + '\n')


if __name__ == '__main__':
    '''parser = argparse.ArgumentParser()
    parser.add_argument('data_directory', type=str,
                        help='Path to data directory')
    parser.add_argument('output_file', type=str,
                        help='Path to output file')
    args = parser.parse_args()
    main(args.data_directory, args.output_file)'''
    
    data_directory = 'MySpeech/my_dev'
    output_file = 'my_dev.json'
    
    main(data_directory, output_file)
