In [6]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

# Preprocess audio data
def preprocess_audio(file_path, duration=30, target_shape=(128, 128)):
    try:
        y, sr = librosa.load(file_path, duration=duration)
        spect = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=target_shape[0])
        spect_db = librosa.power_to_db(spect, ref=np.max)
        if spect_db.shape[1] < target_shape[1]:
            padding = target_shape[1] - spect_db.shape[1]
            spect_db = np.pad(spect_db, ((0, 0), (0, padding)), mode='constant')
        elif spect_db.shape[1] > target_shape[1]:
            spect_db = spect_db[:, :target_shape[1]]
        return spect_db
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Load and preprocess data
data_dir = r"C:\Users\ALFIYA\Downloads\genres"
genres = os.listdir(data_dir)
X, y = [], []
for idx, genre in enumerate(genres):
    genre_path = os.path.join(data_dir, genre)
    for file in os.listdir(genre_path):
        file_path = os.path.join(genre_path, file)
        spectrogram = preprocess_audio(file_path)
        if spectrogram is not None:
            X.append(spectrogram)
            y.append(idx)
X = np.array(X)[..., np.newaxis]
y = to_categorical(np.array(y), num_classes=len(genres))
print(f"Processed data shape: {X.shape}, labels shape: {y.shape} ✅")

# Define a more robust model
model = Sequential([
    Input(shape=(128, 128, 1)),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(genres), activation='softmax')
])

# Compile the model
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Callbacks
#early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
#reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=70,
    batch_size=32,
#    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model
test_loss, _ = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.2f} 🔍")

# Save the model
model.save("enhanced_music_genre_classifier.h5")
print("Model saved as 'enhanced_music_genre_classifier.h5' 💾")


Processed data shape: (1001, 128, 128, 1), labels shape: (1001, 10) ✅


Epoch 1/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 574ms/step - accuracy: 0.1378 - loss: 4.6257 - val_accuracy: 0.0647 - val_loss: 3.2644
Epoch 2/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 579ms/step - accuracy: 0.2436 - loss: 2.6802 - val_accuracy: 0.0647 - val_loss: 2.9256
Epoch 3/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 539ms/step - accuracy: 0.3168 - loss: 2.1336 - val_accuracy: 0.0945 - val_loss: 2.8410
Epoch 4/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 535ms/step - accuracy: 0.3440 - loss: 1.9549 - val_accuracy: 0.1642 - val_loss: 2.8350
Epoch 5/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 537ms/step - accuracy: 0.3944 - loss: 1.7208 - val_accuracy: 0.2388 - val_loss: 2.7638
Epoch 6/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 508ms/step - accuracy: 0.4072 - loss: 1.7042 - val_accuracy: 0.2935 - val_loss: 2.4353
Epoch 7/70
[1m25/25[



Model saved as 'enhanced_music_genre_classifier.h5' 💾


In [1]:
import gradio as gr
import librosa
import numpy as np
from tensorflow.keras.models import load_model

def load_trained_model(model_path: str):
    try:
        model = load_model(model_path, compile=False)  # Load without compiling
        model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])  # Dummy compile
        print(f"Model loaded and compiled successfully from {model_path}")
        return model
    except Exception as e:
        print(f"Error loading the model: {e}")
        return None


# Load the model (replace 'path_to_your_model.h5' with your actual model file path)
model = load_trained_model("enhanced_music_genre_classifier.h5")  # Update with the correct path

# List of possible genres (ensure this matches your dataset genres)
genres = ["Blues", "Classical", "Country", "Disco", "Hip Hop", "Jazz", "Metal", "Pop", "Reggae", "Rock"]

# Function to preprocess the audio
def preprocess_audio(audio: str, duration: int = 30, target_shape: tuple = (128, 128)) -> np.ndarray | None:
    try:
        y, sr = librosa.load(audio, duration=duration)  # Load audio as a numpy array
        spect = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=target_shape[0])  # Mel spectrogram
        spect_db = librosa.power_to_db(spect, ref=np.max)  # Convert to decibels
        
        # Adjust the spectrogram shape to match the target shape
        if spect_db.shape[1] < target_shape[1]:  # Pad if necessary
            padding = target_shape[1] - spect_db.shape[1]
            spect_db = np.pad(spect_db, ((0, 0), (0, padding)), mode='constant')
        elif spect_db.shape[1] > target_shape[1]:  # Crop if necessary
            spect_db = spect_db[:, :target_shape[1]]
        
        return spect_db
    except Exception as e:
        print(f"Error processing audio: {e}")
        return None

# Function to predict the genre of the uploaded audio
def predict_genre(audio: str) -> str:
    if model is None:
        return "Error: Model is not loaded. Please check the model path."

    # Process the audio file
    spect = preprocess_audio(audio)
    if spect is None:
        return "Error processing the audio file. Please ensure it is a valid audio file."

    # Add batch and channel dimensions
    spect = spect[np.newaxis, ..., np.newaxis]
    
    try:
        prediction = model.predict(spect)  # Get the model prediction
        genre = genres[np.argmax(prediction)]  # Get the genre from the model output
        confidence = np.max(prediction) * 100  # Calculate confidence
        return f"Predicted Genre: {genre} 🎵\nConfidence: {confidence:.2f}%"
    except Exception as e:
        print(f"Error during prediction: {e}")
        return "Error during prediction. Please try again."

# Create the Gradio interface
genre_list = ", ".join(genres)
description = (
    f"🎶 **Welcome to the Music Genre Oracle!** 🎶\n\n"
    f"Upload a music file, and I will analyze it to predict its genre with high confidence.\n\n"
    f"**Possible Genres**: {genre_list}\n\n"
    f"💡 *Make sure the audio is clear and contains at least 30 seconds of music for better accuracy.*"
)

interface = gr.Interface(
    fn=predict_genre,
    inputs=gr.Audio(type="filepath", label="Upload Your Music File 🎵"),
    outputs=gr.Text(label="Prediction Results 🧠"),
    title="🎵 MUSIC GENRE ORACLE 🎵",
    description=description,
    theme="default",  # Changed to default
    live=False
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch()


Model loaded and compiled successfully from enhanced_music_genre_classifier.h5
* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 475ms/step
