# from Pytorch to ONNX

In [2]:
import torch
import torch.onnx

# Define the ReciterCNN class
class ReciterCNN(torch.nn.Module):
    def __init__(self, num_classes):
        super(ReciterCNN, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = torch.nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = torch.nn.Linear(32 * 16 * 50, 128)
        self.fc2 = torch.nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model and load weights
num_classes = 12
reciter_model = ReciterCNN(num_classes)
reciter_model.load_state_dict(
    torch.load("Quran_Reciters_Classification/model.pth", map_location="cpu", weights_only=True),
    strict=False
)
reciter_model.eval()

ReciterCNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=25600, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=12, bias=True)
)

In [2]:
import torchaudio


def preprocess_audio_fixed_length(file_path, target_sample_rate=16000, n_mels=64, fixed_length=201):
        """Preprocess audio file into mel spectrogram for classification."""
        waveform, sample_rate = torchaudio.load(file_path)
        if waveform.size(0) > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        if sample_rate != target_sample_rate:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
            waveform = resampler(waveform)
        mel_transform = torchaudio.transforms.MelSpectrogram(sample_rate=target_sample_rate, n_mels=n_mels)
        mel_spec = mel_transform(waveform)
        mel_spec_db = torchaudio.transforms.AmplitudeToDB()(mel_spec)
        if mel_spec_db.size(-1) < fixed_length:
            mel_spec_db = torch.nn.functional.pad(mel_spec_db, (0, fixed_length - mel_spec_db.size(-1)))
        else:
            mel_spec_db = mel_spec_db[:, :, :fixed_length]
        return mel_spec_db

dummy_input = preprocess_audio_fixed_length(r"D:/trying/Quranic_Application/Quran_Reciters_Classification/test_audios/001001.wav")
dummy_input = dummy_input.unsqueeze(0)  # Add batch dimension

In [4]:

# Export the model to ONNX format
onnx_path = "Quran_Reciters_Classification/reciter_model.onnx"
torch.onnx.export(
    reciter_model,
    dummy_input,
    onnx_path,
    export_params=True,
    opset_version=12,
    input_names=['input'],
    output_names=['output']
)
print(f"Model exported to {onnx_path}")



Model exported to Quran_Reciters_Classification/reciter_model.onnx


In [5]:
import onnxruntime
import numpy as np

# Function to predict using ONNX model
def predict_onnx(mel_spec, onnx_model_path):
    """
    Predict the class of the input mel-spectrogram using an ONNX model.

    Args:
        mel_spec (torch.Tensor): Input mel-spectrogram of shape (N, 1, H, W).
        onnx_model_path (str): Path to the ONNX model file.
    Returns:
        int: Predicted class.
    """
    if mel_spec.dim() != 4 or mel_spec.size(1) != 1:
        raise ValueError("Input mel_spec must have shape (N, 1, H, W).")

    # Convert PyTorch tensor to NumPy array
    mel_spec_np = mel_spec.cpu().numpy().astype(np.float32)

    # Load the ONNX model
    ort_session = onnxruntime.InferenceSession(onnx_model_path)

    # Perform inference
    inputs = {ort_session.get_inputs()[0].name: mel_spec_np}
    outputs = ort_session.run(None, inputs)

    # Extract the predicted class
    predicted_class = np.argmax(outputs[0], axis=1)
    return predicted_class[0]

# Example usage
onnx_model_path = "Quran_Reciters_Classification/reciter_model.onnx"
mel_spec_example = preprocess_audio_fixed_length("D:/trying/Quranic_Application/Quran_Reciters_Classification/test_audios/001001.wav").unsqueeze(0)
predicted_class = predict_onnx(mel_spec_example, onnx_model_path)
print(f"Predicted class: {predicted_class}")


Predicted class: 6


In [9]:
import os
import librosa
import soundfile as sf

def convert_mp3_to_wav(directory):
    """
    Converts all MP3 files in a directory to WAV format.

    Args:
        directory (str): Path to the directory containing audio files.
    """
    for filename in os.listdir(directory):
        if filename.endswith(".mp3"):
            # Full path to the MP3 file
            mp3_path = os.path.join(directory, filename)
            # Generate the WAV file path
            wav_path = os.path.splitext(mp3_path)[0] + ".wav"
            # Load the MP3 file
            y, sr = librosa.load(mp3_path, sr=None)
            # Save as WAV
            sf.write(wav_path, y, sr, format='WAV')
            print(f"Converted: {mp3_path} -> {wav_path}")

# Example usage
directory_path = "Quran_Reciters_Classification/test_audios"  # Replace with your directory path
convert_mp3_to_wav(directory_path)


Converted: Quran_Reciters_Classification/test_audios\002155.mp3 -> Quran_Reciters_Classification/test_audios\002155.wav
Converted: Quran_Reciters_Classification/test_audios\006155.mp3 -> Quran_Reciters_Classification/test_audios\006155.wav
Converted: Quran_Reciters_Classification/test_audios\008040.mp3 -> Quran_Reciters_Classification/test_audios\008040.wav
Converted: Quran_Reciters_Classification/test_audios\010065.mp3 -> Quran_Reciters_Classification/test_audios\010065.wav
Converted: Quran_Reciters_Classification/test_audios\010066.mp3 -> Quran_Reciters_Classification/test_audios\010066.wav
Converted: Quran_Reciters_Classification/test_audios\012030.mp3 -> Quran_Reciters_Classification/test_audios\012030.wav
Converted: Quran_Reciters_Classification/test_audios\018020.mp3 -> Quran_Reciters_Classification/test_audios\018020.wav
Converted: Quran_Reciters_Classification/test_audios\018050.mp3 -> Quran_Reciters_Classification/test_audios\018050.wav
Converted: Quran_Reciters_Classification