Import all the libraries

In [None]:
import os
import pandas as pd
import numpy as np
import librosa
import tensorflow_hub as hub
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization, Bidirectional
import tensorflow.keras.layers as layers

Load the YAMNET Model

In [56]:
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

In [57]:
def extract_yamnet_features(audio_path):
    try:
        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
        waveform = audio.astype(np.float32)
        scores, embeddings, _ = yamnet_model(waveform)
        return np.mean(embeddings.numpy(), axis=0)  
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

def extract_all_yamnet_features(data_path):
    data = []
    labels = []
    for genre in os.listdir(data_path):
        genre_path = os.path.join(data_path, genre)
        if os.path.isdir(genre_path):
            for file in os.listdir(genre_path):
                if file.endswith(".wav"):
                    file_path = os.path.join(genre_path, file)
                    features = extract_yamnet_features(file_path)
                    if features is not None:
                        data.append(features)
                        labels.append(genre)
    return np.array(data), np.array(labels)

Support Vector Machine

In [58]:
def train_svm(X_train, y_train):
    model = SVC(kernel='rbf', C=10, gamma='scale')
    model.fit(X_train, y_train)
    return model

Convolutional Neural Network

In [59]:
def create_cnn(input_shape, num_classes):
    model = Sequential([
        Conv1D(128, kernel_size=5, activation='relu', input_shape=input_shape),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Conv1D(256, kernel_size=5, activation='relu'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


Long Short Term Memory

In [60]:
def create_lstm(input_shape, num_classes):
    model = Sequential([
        Bidirectional(LSTM(128, return_sequences=True, input_shape=input_shape)),
        Dropout(0.3),
        Bidirectional(LSTM(64)),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

Transformer

In [61]:
def create_transformer(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)
    x = layers.Dense(128, activation="relu")(inputs)
    x = layers.LayerNormalization()(x)
    transformer_layer = layers.MultiHeadAttention(num_heads=8, key_dim=64)(x, x)
    x = layers.Add()([x, transformer_layer])
    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)

    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [62]:
def classify_audio(audio_path, model, scaler, label_encoder):
    features = extract_yamnet_features(audio_path)
    if features is not None:
        features = scaler.transform([features])
        prediction = model.predict(features)
        return label_encoder.inverse_transform(prediction)[0]
    return "Error processing audio"

In [63]:
# Load extracted features
df = pd.read_csv("/Users/js/Desktop/Music Genre Classification/Data/yamnet_features.csv")
label_encoder = LabelEncoder()
df["Genre"] = label_encoder.fit_transform(df["Genre"])
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [64]:
# Train models
svm_model = train_svm(X_train, y_train)
cnn_model = create_cnn((X_train.shape[1], 1), len(label_encoder.classes_))
lstm_model = create_lstm((X_train.shape[1], 1), len(label_encoder.classes_))
transformer_model = create_transformer((X_train.shape[1], 1), len(label_encoder.classes_))

In [65]:
# Reshape for CNN & LSTM
X_train_r = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_r = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [66]:
cnn_model.fit(X_train_r, y_train, epochs=20, batch_size=32, validation_data=(X_test_r, y_test))

Epoch 1/20


2025-02-13 22:32:28.059812: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2025-02-13 22:32:30.078146: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x414c33e20>

In [67]:
lstm_model.fit(X_train_r, y_train, epochs=20, batch_size=32, validation_data=(X_test_r, y_test))


Epoch 1/20


2025-02-13 22:33:35.236536: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:35.657348: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:35.676757: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:35.890722: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:35.910128: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:36.094725: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:36.121579: I tensorflow/core/grappler/optimizers/cust



2025-02-13 22:33:45.182215: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:45.345101: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:45.358779: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:45.517505: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:33:45.535642: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x3e57a9df0>

In [68]:
transformer_model.fit(X_train_r, y_train, epochs=20, batch_size=32, validation_data=(X_test_r, y_test))

Epoch 1/20


2025-02-13 22:36:46.871583: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-02-13 22:36:48.983 python[97787:6171080] Error creating directory 
 The volume ‚ÄúMacintosh HD‚Äù is out of space. You can‚Äôt save the file ‚Äúmpsgraph-97787-2025-02-13_22_36_48-4089147050‚Äù because the volume ‚ÄúMacintosh HD‚Äù is out of space.
2025-02-13 22:36:49.495 python[97787:6171077] Error creating directory 
 The volume ‚ÄúMacintosh HD‚Äù is out of space. You can‚Äôt save the file ‚Äúmpsgraph-97787-2025-02-13_22_36_49-2862996541‚Äù because the volume ‚ÄúMacintosh HD‚Äù is out of space.
2025-02-13 22:36:49.502 python[97787:6171077] Error creating directory 
 The volume ‚ÄúMacintosh HD‚Äù is out of space. You can‚Äôt save the file ‚Äúmpsgraph-97787-2025-02-13_22_36_49-991255949‚Äù because the volume ‚ÄúMacintosh HD‚Äù is out of space.
2025-02-13 22:36:49.513 python[97787:6171077] Error creating directory 
 The volume ‚Äú



2025-02-13 22:37:45.884380: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x3e55b8850>

In [None]:
# Save models
import joblib
svm_model_path = "/Users/js/Desktop/Music Genre Classification/Models/svm_model.pkl"
cnn_model_path = "/Users/js/Desktop/Music Genre Classification/Models/cnn_model.h5"
lstm_model_path = "/Users/js/Desktop/Music Genre Classification/Models/lstm_model.h5"
transformer_model_path = "/Users/js/Desktop/Music Genre Classification/Models/transformer_model.h5"


joblib.dump(svm_model, svm_model_path)
cnn_model.save(cnn_model_path)
lstm_model.save(lstm_model_path)
transformer_model.save(transformer_model_path)

print("Music Genre Classification Models Trained and Saved Successfully!")


In [43]:
# Load YAMNet model
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')
# Encode genres
label_encoder = LabelEncoder()
df["Genre"] = label_encoder.fit_transform(df["Genre"])

# Split dataset
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def extract_yamnet_features(audio_path):
    try:
        # Load audio file and ensure it is mono
        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
        
        # Ensure waveform is a 1D float32 array
        waveform = audio.astype(np.float32)

        # Pass the waveform to YAMNet (now correctly shaped)
        scores, embeddings, _ = yamnet_model(waveform)

        # Extract and return mean embedding
        return np.mean(embeddings.numpy(), axis=0)  
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None



# SVM Model
def train_svm(X_train, y_train):
    model = SVC(kernel='rbf', C=10, gamma='scale')
    model.fit(X_train, y_train)
    return model

# Train SVM on YAMNet embeddings
svm_model = train_svm(X_train, y_train)

# Function to classify a new audio file
def classify_audio(audio_path, model, label_encoder):
    features = extract_yamnet_features(audio_path)
    if features is not None:
        features = scaler.transform([features])
        prediction = model.predict(features)
        return label_encoder.inverse_transform(prediction)[0]
    return "Error processing audio"

print("YAMNet-based classification system is ready!")

YAMNet-based classification system is ready!


In [45]:
import os

# Path to genres dataset
data_path = "/Users/js/Desktop/Music Genre Classification/Data/genres_original"

# Function to extract YAMNet features for all files
def extract_all_yamnet_features(data_path):
    data = []
    labels = []
    
    for genre in os.listdir(data_path):
        genre_path = os.path.join(data_path, genre)
        if os.path.isdir(genre_path):
            for file in os.listdir(genre_path):
                if file.endswith(".wav"):
                    file_path = os.path.join(genre_path, file)
                    features = extract_yamnet_features(file_path)
                    if features is not None:
                        data.append(features)
                        labels.append(genre)
    
    return np.array(data), np.array(labels)

# Extract features and labels
X_yamnet, y_yamnet = extract_all_yamnet_features(data_path)

# Save as CSV for future use
df_yamnet = pd.DataFrame(X_yamnet)
df_yamnet["Genre"] = y_yamnet
df_yamnet.to_csv("/Users/js/Desktop/Music Genre Classification/Data/yamnet_features.csv", index=False)

print("YAMNet feature extraction complete!")


YAMNet feature extraction complete!


In [46]:
# Load YAMNet features dataset
df = pd.read_csv("/Users/js/Desktop/Music Genre Classification/Data/yamnet_features.csv")

# Encode genre labels
label_encoder = LabelEncoder()
df["Genre"] = label_encoder.fit_transform(df["Genre"])

# Split features and labels
X = df.iloc[:, :-1].values  # 1024 YAMNet features
y = df.iloc[:, -1].values

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train new SVM model
svm_model = SVC(kernel='rbf', C=10, gamma='scale')
svm_model.fit(X_train, y_train)

print("SVM trained on YAMNet features!")


SVM trained on YAMNet features!


In [47]:
def classify_audio(audio_path, model, scaler, label_encoder):
    features = extract_yamnet_features(audio_path)
    if features is not None:
        features = scaler.transform([features])  # Normalize using the new scaler
        prediction = model.predict(features)
        return label_encoder.inverse_transform(prediction)[0]
    return "Error processing audio"


In [55]:
classify_audio('/Users/js/Downloads/action-urban-trap-141691.wav', svm_model, scaler, label_encoder)


'hiphop'