In [None]:
import numpy as np
import librosa
import tensorflow as tf
import joblib

def extract_features(audio_path):
    # Load audio file
    data, sr = librosa.load(audio_path, duration=30)
    
    # Calculate features
    mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13).mean(axis=1)
    chroma = librosa.feature.chroma_stft(y=data, sr=sr).mean(axis=1)
    spectral_contrast = librosa.feature.spectral_contrast(y=data, sr=sr).mean(axis=1)
    spectral_rolloff = librosa.feature.spectral_rolloff(y=data, sr=sr).mean(axis=0)
    zcr = librosa.feature.zero_crossing_rate(y=data).mean(axis=0)
    
    # Combine features
    features = np.hstack([mfcc, chroma, spectral_contrast, spectral_rolloff, zcr])
    return features

def predict_genre(model, audio_path, scaler):
    # Extract features
    audio_features = extract_features(audio_path)
    
    # Ensure feature vector matches training data
    # This step is crucial to match the exact feature vector used during training
    audio_features = audio_features[:len(scaler.mean_)]
    
    # Scale features
    scaled_features = scaler.transform([audio_features])
    
    # Reshape input data
    scaled_features = np.array(scaled_features).reshape(1, -1)
    
    # Make prediction
    predictions = model.predict(scaled_features)
    predicted_index = np.argmax(predictions, axis=1)
    
    # Genre classes
    genre_classes = [
        "Blues", "Classical", "Country", "Disco", 
        "Hip-hop", "Jazz", "Metal", "Pop", "Reggae", "Rock"
    ]
    
    return genre_classes[predicted_index[0]]

# Load the model and scaler
model = tf.keras.models.load_model('music_genre_model2.h5')
scaler = joblib.load('genre_scaler.pkl')

# Predict genre for the audio file
genre = predict_genre(model, './genres_original/reggae/reggae.00007.wav', scaler)
print(f"Predicted music genre: {genre}")