In [2]:
import numpy as np
import matplotlib.pyplot as plot
import tensorflow as tf
import dill as pickle

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Download and Preprocess Audio Files in Surah Fatihah

This will take quite a bit of time, but the good news is that you only need to do it once! After you've done this once, the files will be saved locally and you can skip the cells in this section.

In [None]:
%run -i "../download.py" -s 1

In [None]:
%run -i "../audio_preprocessing/generate_features.py" -f mfcc -s 1 --local_download_dir "../.audio" --output_dir "../.outputs" 

In [6]:
%run -i "../audio_preprocessing/generate_one_hot_encoding.py" -i "../data/data-uthmani.json" -o "../data/one-hot.pkl" 

### Write Methods to Load the Dataset

In [75]:
"""
Inspired by: https://github.com/keras-team/keras/blob/master/examples/lstm_seq2seq.py
"""

def convert_list_of_arrays_to_padded_array(list_varying_sizes, pad_value=0):
    '''
    Converts a list of arrays of varying sizes to a single numpy array. The extra elements are set to 0
    '''
    max_shape = [0]*len(list_varying_sizes[0].shape)
    # first pass to compute the max size
    for arr in list_varying_sizes:
        shape = arr.shape
        max_shape = [max(s1, s2) for s1, s2 in zip(shape, max_shape)]
    padded_array = pad_value * np.ones((len(list_varying_sizes), *max_shape))
    
    # second pass to fill in the values in the array:
    for a, arr in enumerate(list_varying_sizes):
        r, c = arr.shape  # TODO(abidlabs): maybe make more general to more than just 2D arrays.
        padded_array[a, :r, :c] = arr
    
    return padded_array

def preprocess_encoder_input(arr):
    '''
    Simple method to handle the complex MFCC coefs that are produced during preprocessing. This means:
    1. (For now), discarding one of the channels of the MFCC coefs
    2. Collapsing any empty dimensions
    '''
    return arr.squeeze()[0]


# Load every one-hot-encoded output as a dictionary
with open('../data/one-hot.pkl', 'rb') as one_hot_quran_pickle_file:
    one_hot_obj = pickle.load(one_hot_quran_pickle_file)


def get_one_hot_encoded_verse(surah_num, ayah_num): 
    '''
    Converts a one-hot-encoded verse into forms that can be used by the LSTM decoder
    
    :param surah_num: an int designating the chapter number, one-indexed
    :param ayah_num: an int designating the verse number, one-indexed
    '''
    # Load the preprocessed one-hot encoding 
    one_hot_verse = one_hot_obj['quran']['surahs'][surah_num - 1]['ayahs'][ayah_num - 1]['text']
    num_chars_in_verse, num_unique_chars = one_hot_verse.shape
    
    # Generate decoder_input_data 
    decoder_input = np.zeros((num_chars_in_verse+2, num_unique_chars+2))
    decoder_input[0, :] = [0] * num_unique_chars + [1, 0] # START token
    decoder_input[1:num_chars_in_verse+1, :-2] = one_hot_verse # original verse
    decoder_input[-1, :] = [0] * num_unique_chars + [0, 1] # STOP token

    # Generate decoder_target_data 
    decoder_target = np.zeros((num_chars_in_verse+2, num_unique_chars+2))
    decoder_target[:num_chars_in_verse, :-2] = one_hot_verse # original verse
    decoder_target[-1, :] = [0] * num_unique_chars + [0, 1] # STOP token
    
    return decoder_input, decoder_target

    
def build_dataset(local_coefs_dir='../.outputs/mfcc', surahs=[1], n=100):
    '''
    Builds a dataset to be used with the sequence-to-sequence network.
    
    :param local_coefs_dir: a string with the path of the coefficients for prediction
    '''
    
    def get_encoder_and_decoder_data(n=100):
        count = 0
        encoder_input_data = []
        decoder_input_data = []
        decoder_target_data = []
        for surah_num in surahs:
            local_surah_dir = os.path.join(local_coefs_dir, "s" + str(surah_num))
            for _, ayah_directories, _ in os.walk(local_surah_dir):
                for ayah_directory in ayah_directories:
                    ayah_num = ayah_directory[1:]
                    local_ayah_dir = os.path.join(local_surah_dir, ayah_directory)
                    for _, _, recording_filenames in os.walk(local_ayah_dir):
                        for recording_filename in recording_filenames:
                            local_coefs_path = os.path.join(local_ayah_dir, recording_filename)
                            encoder_input = np.load(local_coefs_path)
                            encoder_input = preprocess_encoder_input(encoder_input)
                            encoder_input_data.append(encoder_input)

                            decoder_input, decoder_target = get_one_hot_encoded_verse(int(surah_num), int(ayah_num))
                            decoder_input_data.append(decoder_input)
                            decoder_target_data.append(decoder_target)
                            count += 1
                            if count == n:
                                return encoder_input_data, decoder_input_data, decoder_target_data
        return encoder_input_data, decoder_input_data, decoder_target_data
    
    
    encoder_input_data, decoder_input_data, decoder_target_data = get_encoder_and_decoder_data(n=n)
    encoder_input_data = convert_list_of_arrays_to_padded_array(encoder_input_data)
    decoder_input_data = convert_list_of_arrays_to_padded_array(decoder_input_data)
    decoder_target_data = convert_list_of_arrays_to_padded_array(decoder_target_data)
    return encoder_input_data, decoder_input_data, decoder_target_data

In [76]:
encoder_input_data, decoder_input_data, decoder_target_data = build_dataset(n=10)

In [82]:
[print(a.shape) for a in [encoder_input_data, decoder_input_data, decoder_target_data]]

(10, 914, 13)
(10, 40, 61)
(10, 40, 61)


[None, None, None]

### Build a Keras Model

In [None]:
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None, num_decoder_tokens))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2)

In [16]:
get_one_hot_encoded_verse(1, 1).shape

(40, 61)