In [1]:
import os

# storage
from Storage.TFStorage import TFStorage

# Config
import Utils.Eva_config_consts as config

# Utilities
import Utils.folder_utils as folder_utils
import Utils.TIMIT_utils as TIMIT_utils
import Utils.image_utils as image_utils

# Sound utils
from Utils.nist_reader import NistReader
import Utils.sound_utils as sound_utils

# Spectrograms
from Utils.SpectrogramFactory import SpectrogramFactory
from Utils.Spectrogram import Spectrogram

import numpy as np
import tensorflow as tf
# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [3]:
def CutPhonemeIntoChunksAndSave(storage, phoneme_spectrums, chunkLength, phoneme, speaker):
    """
    
    """
    
    totalNumberOfSpectrums = phoneme_spectrums.shape[1]
    #The stepLength is 1 therefore the number of chunks is calculated as follows
    numChunks = totalNumberOfSpectrums-chunkLength + 1

    for i in range(numChunks):
        chunk = phoneme_spectrums[:,i:i+chunkLength]
        real = np.real(chunk)
        imag = np.imag(chunk)
        phone_item = np.stack((real,imag), axis=-1)
        row = (phone_item, phoneme, speaker)
        storage.InsertRow(row)

In [4]:
paths = folder_utils.reverse_folder(config.PATH_TO_TIMIT_TRAIN, ".WAV")
nistReader = NistReader()
spectrogramFactory = SpectrogramFactory()
storage = TFStorage(config.DATESET_FILE)
storage.CreateWriter()

In [8]:
for path in paths:
    print path
    phonemes = TIMIT_utils.parse_phoneme_file(path)
    speaker = folder_utils.get_speaker_name(path)

    # temp_speaker_folder is used for storing converted to wav audio files.
    temp_speaker_folder = os.path.join(config.TEMP_DATA_FOLDER_PATH, speaker)
    if not os.path.exists(temp_speaker_folder):
        os.makedirs(temp_speaker_folder)

    # convert a nist file to a wav file
    wav_file = nistReader.Nist2Wav(path, temp_speaker_folder)
    for i in range(len(phonemes)):
        phoneme = phonemes[i]
        #Cutting one phoneme
        if i == 0 or i == len(phonemes):
            start = int(phoneme[0])
            end = int(phoneme[1])
        else:
            start = int(phoneme[0]) - config.PHONEME_OFFSET
            end = int(phoneme[1]) + config.PHONEME_OFFSET

        # TODO: create a phoneme object
        phone_file = sound_utils.cutPhonemeChunk(wav_file, config.TEMP_PHONEME_FOLDER_PATH, start, end, phoneme[2])
        phoneme_spectrogram = spectrogramFactory.create_spectrogram(phone_file) 
        phone_subset = CutPhonemeIntoChunksAndSave(storage, phoneme_spectrogram.spectrogram_values, 
                                               config.SPECTROGRAM_CHUNK_LENGTH, phoneme[2], speaker)
storage.StopWriting()

Data/TIMIT/timit/train/dr1/fcjf0/SA1.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SA2.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SI1027.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SI1657.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SI648.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SX127.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SX217.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SX307.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SX37.WAV
Data/TIMIT/timit/train/dr1/fcjf0/SX397.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SA1.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SA2.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SI1271.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SI1406.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SI2036.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SX146.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SX236.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SX326.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SX416.WAV
Data/TIMIT/timit/train/dr1/fdaw0/SX56.WAV
Data/TIMIT/timit/train/dr1/fdml0/SA1.WAV
Data/TIMIT/timit/train/dr1/fdml0/SA2.WAV
Data/TIMIT/timit/train/dr1/fdml0/SI1149.WAV
Data/TIMIT/timit/tr

KeyboardInterrupt: 