In [1]:
import os

# storage
from Storage.TFStorage import *

# Config
import Utils.Eva_config_consts as config

# Utilities
import Utils.folder_utils as folder_utils
import Utils.TIMIT_utils as TIMIT_utils
import Utils.image_utils as image_utils

# Sound utils
from Utils.nist_reader import NistReader
import Utils.sound_utils as sound_utils

# Spectrograms
from Utils.SpectrogramFactory import SpectrogramFactory
from Utils.Spectrogram import Spectrogram

import numpy as np

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
def CutPhonemeIntoChunksAndSave(storage, phoneme_spectrums, chunkLength, phoneme, speaker):
    """
    Accepts a spectrogram of arbitrary size of one concrete phoneme. Cuts chunks of size chunkLength which
    will be input for the neural network. This gives the opportunity to deal with different phoneme length.
    To create as many and as variable chunk spectrograms for the specified phoneme the shift of size 1 is used.
    Finally, a cut chunk is saved to the storage.

    :param storage: a TFStorage storage
    :param phoneme_spectrums: Cut phoneme spectrogram
    :param chunkLength: spectrogram chunk length which defines how many spectrums are considered
    around the middle one. The middle one defines the phoneme and speaker.
    :param phoneme: Phoneme string value
    :param speaker: Speaker string value
    :return:
    """
    
    totalNumberOfSpectrums = phoneme_spectrums.shape[1]
    #The stepLength is 1 therefore the number of chunks is calculated as follows
    numChunks = totalNumberOfSpectrums-chunkLength + 1

    for i in range(numChunks):
        chunk = phoneme_spectrums[:,i:i+chunkLength]
        real = np.real(chunk)
        imag = np.imag(chunk)
        phone_item = np.stack((real,imag), axis=-1)
        row = (phone_item, phoneme, speaker)
        storage.insert_row(row)

In [3]:
paths = folder_utils.reverse_folder(config.PATH_TO_TIMIT_TRAIN, ".WAV")
nistReader = NistReader()
spectrogramFactory = SpectrogramFactory(window_size=config.WINDOW_SIZE, window_step=config.WINDOW_STEP)

In [4]:
# One folder dr1 test
dr1_path = os.path.join(config.PATH_TO_TIMIT_TRAIN, "dr1")
paths = folder_utils.reverse_folder(dr1_path, ".WAV")
dr1_path

'Data/TIMIT/timit/train/dr1'

In [8]:
with TFStorage(config.DATESET_FILE_PATH, TFStorageOpenOptions.WRITE) as storage:
    for path in paths:
        print path
        phonemes = TIMIT_utils.parse_phoneme_file(path)
        speaker = folder_utils.get_speaker_name(path)

        # temp_speaker_folder is used for storing converted to wav audio files.
        temp_speaker_folder = os.path.join(config.TEMP_DATA_FOLDER_PATH, speaker)
        if not os.path.exists(temp_speaker_folder):
            os.makedirs(temp_speaker_folder)

        # convert a nist file to a wav file
        wav_file = nistReader.Nist2Wav(path, temp_speaker_folder)
        for i in range(len(phonemes)):
            phoneme = phonemes[i]
            #Cutting one phoneme
            if i == 0 or i == len(phonemes):
                start = int(phoneme[0])
                end = int(phoneme[1])
            else:
                start = int(phoneme[0]) - config.PHONEME_OFFSET
                end = int(phoneme[1]) + config.PHONEME_OFFSET

            # TODO: create a phoneme object
            phone_file = sound_utils.cutPhonemeChunk(wav_file, config.TEMP_PHONEME_FOLDER_PATH, start, end, phoneme[2])
            phoneme_spectrogram = spectrogramFactory.create_spectrogram(phone_file)
            phone_subset = CutPhonemeIntoChunksAndSave(storage, phoneme_spectrogram.spectrogram_values,
                                                   config.SPECTROGRAM_CHUNK_LENGTH, phoneme[2], speaker)

Data/TIMIT/timit/train/dr1/fcjf0/SA1.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SA2.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SI1027.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SI1657.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SI648.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SX127.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SX217.WAV


Data/TIMIT/timit/train/dr1/fcjf0/SX307.WAV


KeyboardInterrupt: 