In [1]:
import os
os.chdir("../")

In [2]:
%pwd

'e:\\MyOnlineCourses\\ML_Projects\\arabic-digits-recognition'

In [3]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTFInferenceConfig:
    root_dir: Path
    audio_path: str
    model_path: str
    

In [30]:
from ard.constants import *
from ard.utils.help import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])



    
    def get_data_tf_inference_config(self) -> DataTFInferenceConfig:
        config = self.config.data_tf_inference

        create_directories([config.root_dir])

        data_tf_inference_config = DataTFInferenceConfig(
            root_dir=config.root_dir,
            audio_path=config.audio_path,
            model_path=config.model_path
           
        )

        return data_tf_inference_config, self.params

In [35]:
import tensorflow as tf
import numpy as np
import pandas as pd
from pathlib import Path
from typing import Dict, Any, Tuple
from dataclasses import dataclass
import os

import tensorflow_io as tfio
from ard import logger

tf.get_logger().setLevel('ERROR') 
class ModelInference:
    def __init__(self, config: DataTFInferenceConfig, params: Dict[str, Any]):
        self.config = config
        self.params=params
        self.model = tf.keras.models.load_model(self.config.model_path)
        self._target_sample_rate = params['TARGET_SAMPLE_RATE']
    def preprocess_audio(self):
        # Read the audio file
        test_data = pd.read_csv(self.config.audio_path)
    
        # Select a random row from the specified column
        audio_path = test_data["path"].sample(n=1).values[0]  # Get a 
        logger.info(str(audio_path))
        audio_data = tf.io.read_file(audio_path)
        audio, sample_rate = tf.audio.decode_wav(audio_data, desired_channels=1)
        audio = tf.squeeze(audio, axis=-1)
        sample_rate = tf.cast(sample_rate, dtype=tf.int64)
        audio =  tfio.audio.resample(audio,rate_in=sample_rate, rate_out=16000)
        logger.info(f"Input length {len(audio)}")
        input_len = 10000
        if tf.shape(audio)[0] < input_len:
            zero_padding = tf.zeros(
                [10000] - tf.shape(audio),
                dtype=tf.float32)
            audio = tf.cast(audio, dtype=tf.float32)
            equal_length = tf.concat([audio, zero_padding], 0)
        else:
            equal_length = audio[:input_len]
            
        logger.info(f"Input length {len(equal_length)}")
        spectrogram = tf.signal.stft(
            equal_length, frame_length=self.params.FRAME_LENGTH, 
            frame_step=self.params.FRAME_STEP, window_fn = tf.signal.hamming_window)

        spectrogram = tf.abs(spectrogram)
        spectrogram = spectrogram[..., tf.newaxis]
        logger.info(f"Shape of WAV is {spectrogram.shape}")
        spectrogram = tf.expand_dims(spectrogram, axis=0)  

        return spectrogram

    def predict(self):
        audio_data = self.preprocess_audio()
        predictions = self.model.predict(audio_data)
        predicted_label = np.argmax(predictions, axis=1)
        logger.info(f"Prediction score of the givin digit is:{predicted_label}")
        



In [36]:
try:
    config = ConfigurationManager()
    data_tf_inference_config, data_tf_inference_params = config.get_data_tf_inference_config()
    data_tf_inference = ModelInference(config=data_tf_inference_config, params=data_tf_inference_params)
    data_tf_inference.predict()

except Exception as e:
    raise e

[2024-08-12 11:12:08,400: INFO: help: yaml file: config\config.yaml loaded successfully. Content size: 9]
[2024-08-12 11:12:08,410: INFO: help: yaml file: params.yaml loaded successfully. Content size: 9]
[2024-08-12 11:12:08,413: INFO: help: Total directories created: 1]
[2024-08-12 11:12:08,419: INFO: help: Total directories created: 1]
[2024-08-12 11:12:12,612: INFO: 3137233790: E:\MyOnlineCourses\ML_Projects\ADR\ArabDIGdtCls\one\one-2-t-37.wav]
[2024-08-12 11:12:12,706: INFO: 3137233790: Input length 12890]
[2024-08-12 11:12:12,711: INFO: 3137233790: Input length 10000]
[2024-08-12 11:12:12,732: INFO: 3137233790: Shape of WAV is (18, 513, 1)]
[2024-08-12 11:12:14,641: INFO: 3137233790: Prediction score of the givin digit is:[4]]
