In [1]:
import tensorflow as tf
import librosa
import numpy as np
import os

# -------------------------------
# 1. Load trained model
# -------------------------------
model_path = "arabic_music_genre_model.keras"  # path to your model
model = tf.keras.models.load_model(model_path)

2025-08-29 21:23:39.390022: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-29 21:23:39.396532: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-29 21:23:40.034578: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-29 21:23:43.084514: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off,

In [None]:
SR = 22050  # sampling rate
DURATION = 30  # Changed from 10 to 30 seconds per clip
SAMPLES_PER_TRACK = SR * DURATION
N_MFCC = 13  # number of MFCCs
N_FFT = 2048
HOP_LENGTH = 5169  # Modified for 30-second clips: (22050*30)/5169 ≈ 128 → padded to 130

def extract_features(audio_signal):
    """Process audio signal instead of file path"""
    try:
        # Ensure correct length
        signal = librosa.util.fix_length(audio_signal, size=SAMPLES_PER_TRACK)

        # Extract MFCC features
        mfcc = librosa.feature.mfcc(y=signal, sr=SR, n_mfcc=N_MFCC,
                                   n_fft=N_FFT, hop_length=HOP_LENGTH)

        # Pad/trim to exact 130 time steps
        if mfcc.shape[1] < 130:
            mfcc = np.pad(mfcc, ((0,0), (0,130 - mfcc.shape[1])))
        else:
            mfcc = mfcc[:, :130]

        # Transpose and add channel dimension
        mfcc = mfcc.T[..., np.newaxis]  # (130, 13, 1)

        return mfcc

    except Exception as e:
        print(f"Feature extraction error: {str(e)}")
        return None

In [3]:
genre_names = ['east', 'loyal', 'Muwa', 'poems', 'rai']

In [None]:
def process_any_audio(file_path, chunk_size=30):  # Changed default chunk_size to 30
    try:
        # Load full audio
        print(f"Loading audio file: {file_path}")
        full_signal, sr = librosa.load(file_path, sr=SR)
        total_samples = len(full_signal)
        total_duration = total_samples / SR

        print(f"Audio duration: {total_duration:.2f} seconds")
        print(f"Processing in {chunk_size}-second chunks")

        predictions = []
        chunk_results = []  # To store individual chunk predictions

        # Process audio in 30-second chunks
        for start_sample in range(0, total_samples, SR*chunk_size):
            end_sample = min(start_sample + SR*chunk_size, total_samples)
            chunk_duration = (end_sample - start_sample) / SR

            # Skip chunks that are too short (less than 5 seconds)
            if chunk_duration < 5:
                print(f"Skipping final chunk ({chunk_duration:.2f}s) - too short")
                continue

            print(f"Processing chunk {len(predictions)+1}: {start_sample/SR:.2f}s - {end_sample/SR:.2f}s")

            chunk = full_signal[start_sample:end_sample]

            # Process chunk
            features = extract_features(chunk)
            if features is None:
                print(f"Skipping invalid chunk {start_sample/SR:.2f}s - {end_sample/SR:.2f}s")
                continue

            # Predict
            pred = model.predict(features[np.newaxis, ...], verbose=0)
            predicted_index = np.argmax(pred)
            predicted_genre = genre_names[predicted_index]
            confidence = float(pred[0][predicted_index])

            print(f"  Chunk result: {predicted_genre} (confidence: {confidence:.2f})")

            predictions.append(predicted_index)
            chunk_results.append({
                "start_time": start_sample/SR,
                "end_time": end_sample/SR,
                "genre": predicted_genre,
                "confidence": confidence,
                "all_confidences": {genre_names[i]: float(pred[0][i]) for i in range(len(genre_names))}
            })

        if not predictions:
            return "No valid predictions"

        # Get most common genre
        most_common_genre_index = np.bincount(predictions).argmax()
        most_common_genre = genre_names[most_common_genre_index]

        # Calculate average confidence for each genre
        genre_confidences = {}
        for chunk in chunk_results:
            for genre, conf in chunk["all_confidences"].items():
                if genre not in genre_confidences:
                    genre_confidences[genre] = []
                genre_confidences[genre].append(conf)

        avg_confidences = {genre: np.mean(confs) for genre, confs in genre_confidences.items()}
        sorted_confidences = sorted(avg_confidences.items(), key=lambda x: x[1], reverse=True)

        # Prepare result
        result = {
            "predicted_genre": most_common_genre,
            "prediction_count": {genre_names[i]: np.bincount(predictions)[i] if i < len(np.bincount(predictions)) else 0
                               for i in range(len(genre_names))},
            "average_confidences": sorted_confidences,
            "chunk_results": chunk_results
        }

        print("\n===== RESULTS =====")
        print(f"Predicted genre: {result['predicted_genre']}")
        print(f"Processed {len(chunk_results)} chunks of {chunk_size} seconds each")

        print("\nGenre distribution across chunks:")
        for genre, count in result['prediction_count'].items():
            if count > 0:
                print(f"  {genre}: {count} chunks ({count/len(chunk_results)*100:.1f}%)")

        print("\nAverage confidence scores:")
        for genre, conf in sorted_confidences:
            print(f"  {genre}: {conf*100:.2f}%")

        return result

    except Exception as e:
        import traceback
        traceback.print_exc()
        return f"Processing error: {str(e)}"

In [5]:
file_path = "niama.wav"  # replace with your file
result = process_any_audio(file_path)
if isinstance(result, dict):
    print(f"\nPredicted genre: {result['predicted_genre']}")
    print("Average confidences:")
    for genre, conf in result["average_confidences"]:
        print(f"  {genre}: {conf*100:.2f}%")
else:
    print(result)

Loading audio file: niama.wav
Audio duration: 1174.34 seconds
Processing in 30-second chunks
Processing chunk 1: 0.00s - 30.00s


2025-08-29 21:23:51.242440: I external/local_xla/xla/service/service.cc:163] XLA service 0x7fea58011fa0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2025-08-29 21:23:51.242470: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Host, Default Version
2025-08-29 21:23:51.261303: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1756499031.485434    6542 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


  Chunk result: loyal (confidence: 0.95)
Processing chunk 2: 30.00s - 60.00s
  Chunk result: loyal (confidence: 1.00)
Processing chunk 3: 60.00s - 90.00s
  Chunk result: loyal (confidence: 0.93)
Processing chunk 4: 90.00s - 120.00s
  Chunk result: loyal (confidence: 0.99)
Processing chunk 5: 120.00s - 150.00s
  Chunk result: rai (confidence: 0.58)
Processing chunk 6: 150.00s - 180.00s
  Chunk result: loyal (confidence: 1.00)
Processing chunk 7: 180.00s - 210.00s
  Chunk result: poems (confidence: 0.81)
Processing chunk 8: 210.00s - 240.00s
  Chunk result: poems (confidence: 0.95)
Processing chunk 9: 240.00s - 270.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 10: 270.00s - 300.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 11: 300.00s - 330.00s
  Chunk result: poems (confidence: 1.00)
Processing chunk 12: 330.00s - 360.00s
  Chunk result: loyal (confidence: 0.98)
Processing chunk 13: 360.00s - 390.00s
  Chunk result: rai (confidence: 0.76)
Processing chunk 14:

In [6]:
file_path = "khaled.wav"  # replace with your file
result = process_any_audio(file_path)
if isinstance(result, dict):
    print(f"\nPredicted genre: {result['predicted_genre']}")
    print("Average confidences:")
    for genre, conf in result["average_confidences"]:
        print(f"  {genre}: {conf*100:.2f}%")
else:
    print(result)

Loading audio file: khaled.wav
Audio duration: 327.56 seconds
Processing in 30-second chunks
Processing chunk 1: 0.00s - 30.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 2: 30.00s - 60.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 3: 60.00s - 90.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 4: 90.00s - 120.00s
  Chunk result: rai (confidence: 0.88)
Processing chunk 5: 120.00s - 150.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 6: 150.00s - 180.00s
  Chunk result: rai (confidence: 0.69)
Processing chunk 7: 180.00s - 210.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 8: 210.00s - 240.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 9: 240.00s - 270.00s
  Chunk result: rai (confidence: 1.00)
Processing chunk 10: 270.00s - 300.00s
  Chunk result: rai (confidence: 0.91)
Processing chunk 11: 300.00s - 327.56s
  Chunk result: rai (confidence: 0.99)

===== RESULTS =====
Predicted genre: rai
Processed 11 chunks of 3