# Import The Packages that We Will Use It

In [1]:
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.utils import register_keras_serializable

2024-07-02 11:13:41.630032: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


**Define Some Parameters**

In [2]:
# Constants and Parameters
frame_length = 256
frame_step = 160
fft_length = 384
characters = ' ءآأؤإئابتثجحخدذرزسشصضطظعغفقكلمنهةوىي'

# Directory containing test audio files
test_audio_dir = './Data'
output_csv_path = './Output/predictions.csv'

**Define Some Functions**

In [3]:
# String lookup layers
char_to_num = keras.layers.StringLookup(vocabulary=list(characters), oov_token="")
num_to_char = keras.layers.StringLookup(vocabulary=char_to_num.get_vocabulary(), oov_token="", invert=True)

def preprocess_audio(wav_file):
    """
    Preprocesses a single audio file for ASR.
    
    Parameters:
    - wav_file (str): Path to the audio file (WAV format).
    
    Returns:
    - spectrogram (Tensor): Preprocessed spectrogram.
    """
    file = tf.io.read_file(wav_file)
    audio, _ = tf.audio.decode_wav(file)
    audio = tf.squeeze(audio, axis=-1)
    audio = tf.cast(audio, tf.float32)
    
    if tf.shape(audio)[0] < fft_length:
        pad_amount = fft_length - tf.shape(audio)[0]
        audio = tf.pad(audio, paddings=[[0, pad_amount]])
    
    spectrogram = tf.signal.stft(
        audio, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length
    )
    spectrogram = tf.abs(spectrogram)
    spectrogram = tf.math.pow(spectrogram, 0.5)
    
    means = tf.math.reduce_mean(spectrogram, 1, keepdims=True)
    stddevs = tf.math.reduce_std(spectrogram, 1, keepdims=True)
    spectrogram = (spectrogram - means) / (stddevs + 1e-10)
    
    return spectrogram

def decode_batch_predictions(pred):
    """
    Decodes batch predictions into text using character lookup.
    
    Parameters:
    - pred (Tensor): Predicted output from the model.
    
    Returns:
    - output_text (list): List of decoded text predictions.
    """
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0]
    output_text = []
    for result in results:
        result = tf.strings.reduce_join(num_to_char(result)).numpy().decode("utf-8")
        output_text.append(result)
    return output_text

@register_keras_serializable()
def CTCLoss(y_true, y_pred):
    """
    Custom CTC Loss function for sequence prediction.
    
    Parameters:
    - y_true (Tensor): True labels.
    - y_pred (Tensor): Predicted outputs from the model.
    
    Returns:
    - loss (Tensor): Calculated CTC loss.
    """
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

# Load The Configuration Of Our Model

In [4]:
# Load model configuration from JSON
with open('./Models/model_config.json', 'r') as json_file:
    model_config = json.load(json_file)

# Recreate model architecture
model = keras.models.model_from_json(json.dumps(model_config))

# Load model weights
model.load_weights('./Models/Model.h5')

# Compile model with custom CTC loss function
model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4), loss=CTCLoss)

# Get prediction using the model and save it in CSV file

In [None]:
# List of test audio files
test_files = [os.path.join(test_audio_dir, f) for f in os.listdir(test_audio_dir) if f.endswith('.wav')]

# Predict and save to CSV
predictions = []
for audio_file in test_files:
    spectrogram = preprocess_audio(audio_file)
    spectrogram = tf.expand_dims(spectrogram, axis=0)
    prediction = model.predict(spectrogram)
    decoded_text = decode_batch_predictions(prediction)
    predictions.append({'audio': os.path.splitext(os.path.basename(audio_file))[0], 'transcript': decoded_text[0]})

# Save predictions to CSV
pred_df = pd.DataFrame(predictions)
pred_df.to_csv(output_csv_path, index=False, encoding='utf-8')

print(f"Predictions saved to {output_csv_path}")