In [None]:
import librosa as lb
import os
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm

dataset_path = "genres_original"

def peak_normalization(y):
    return lb.util.normalize(y)

def extract_song_feature(song_path):
    try:
        y, sr = lb.load(song_path, sr=None, duration=30)

        y = peak_normalization(y)

        mfcc = lb.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)

        chroma_stft = np.mean(lb.feature.chroma_stft(y=y, sr=sr), axis=1)

        spectral_bandwidth = np.mean(lb.feature.spectral_bandwidth(y=y, sr=sr))
        spectral_centroid = np.mean(lb.feature.spectral_centroid(y=y, sr=sr))
        spectral_rollof = np.mean(lb.feature.spectral_rolloff(y=y, sr=sr))
        
        spectral_contrast = lb.feature.spectral_contrast(y=y, sr=sr)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        spectral_contrast_std = np.std(spectral_contrast, axis=1)

        rms = np.mean(lb.feature.rms(y=y))

        tempo, _ = lb.beat.beat_track(y=y, sr=sr)

        return np.hstack([mfcc_mean, 
                          mfcc_std,
                          chroma_stft, 
                          spectral_bandwidth, 
                          spectral_centroid, 
                          spectral_rollof, 
                          spectral_contrast_mean,
                          spectral_contrast_std,
                          rms,
                          tempo]) 

    except Exception as e:
        print(f"Error has occured in song file {song_path}: {e}")
        return None


In [124]:
X = []
y = []

if not os.path.exists("song_dataset.csv"):
    for genre in os.listdir(dataset_path):
        genre_path = os.path.join(dataset_path, genre)
        
        for song in tqdm(os.listdir(genre_path), desc=f"Extracting song features in folder {genre}"):
            song_path = os.path.join(genre_path, song)
            song_features = extract_song_feature(song_path)

            if song_features is not None:
                X.append(song_features)
                y.append(genre)
            

Extracting song features in folder blues: 100%|██████████| 100/100 [01:28<00:00,  1.13it/s]
Extracting song features in folder classical: 100%|██████████| 100/100 [00:42<00:00,  2.37it/s]
Extracting song features in folder country: 100%|██████████| 100/100 [00:40<00:00,  2.49it/s]
Extracting song features in folder disco: 100%|██████████| 100/100 [00:43<00:00,  2.28it/s]
Extracting song features in folder hiphop: 100%|██████████| 100/100 [00:49<00:00,  2.02it/s]
  y, sr = lb.load(song_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error has occured in song file genres_original\jazz\jazz.00054.wav: 


Extracting song features in folder jazz: 100%|██████████| 100/100 [01:21<00:00,  1.23it/s]
Extracting song features in folder metal: 100%|██████████| 100/100 [01:32<00:00,  1.09it/s]
Extracting song features in folder pop: 100%|██████████| 100/100 [00:40<00:00,  2.48it/s]
Extracting song features in folder reggae: 100%|██████████| 100/100 [00:41<00:00,  2.44it/s]
Extracting song features in folder rock: 100%|██████████| 100/100 [00:44<00:00,  2.23it/s]


In [125]:
if(not os.path.exists("song_dataset.csv")):
    X = np.array(X)
    y = np.array(y)

    df = pd.DataFrame(X)
    df["genre"] = y
    df.to_csv("song_dataset.csv", index=False)

In [152]:
df = pd.read_csv("song_dataset.csv")

labels = np.array(["blues", 
          "classical", 
          "country",
          "disco",
          "hiphop",
          "jazz",
          "metal",
          "pop",
          "reggae",
          "rock"])

X = df.drop(columns=["genre"])
y = df['genre'].map({"blues": 0, 
                     "classical": 1, 
                     "country": 2,
                     "disco": 3,
                     "hiphop": 4,
                     "jazz": 5,
                     "metal": 6,
                     "pop": 7,
                     "reggae": 8,
                     "rock": 9})

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size=0.25)

model = XGBClassifier(n_estimators=500, max_depth=15, learning_rate=0.04)
model.fit(X_train, y_train)

preds = model.predict(X_test)

acc = accuracy_score(y_test, preds)

print("Reuslt: ", acc)
print("Preds:", labels[preds])

Reuslt:  0.632
Preds: ['reggae' 'pop' 'pop' 'disco' 'country' 'country' 'hiphop' 'jazz' 'hiphop'
 'metal' 'rock' 'metal' 'pop' 'metal' 'pop' 'country' 'reggae' 'hiphop'
 'hiphop' 'country' 'jazz' 'disco' 'country' 'pop' 'hiphop' 'blues'
 'classical' 'country' 'reggae' 'disco' 'blues' 'pop' 'metal' 'jazz'
 'metal' 'metal' 'classical' 'classical' 'metal' 'rock' 'pop' 'blues'
 'pop' 'hiphop' 'classical' 'jazz' 'country' 'pop' 'blues' 'reggae'
 'metal' 'jazz' 'country' 'jazz' 'blues' 'hiphop' 'jazz' 'jazz' 'jazz'
 'pop' 'metal' 'metal' 'rock' 'jazz' 'pop' 'disco' 'jazz' 'disco'
 'country' 'jazz' 'blues' 'classical' 'metal' 'classical' 'metal' 'reggae'
 'blues' 'pop' 'jazz' 'metal' 'country' 'jazz' 'country' 'reggae' 'disco'
 'hiphop' 'metal' 'blues' 'hiphop' 'hiphop' 'hiphop' 'pop' 'reggae' 'jazz'
 'pop' 'blues' 'jazz' 'reggae' 'metal' 'reggae' 'metal' 'hiphop' 'country'
 'rock' 'jazz' 'rock' 'reggae' 'jazz' 'reggae' 'metal' 'country'
 'classical' 'blues' 'country' 'disco' 'disco' 'jazz' '

In [None]:
target_song = extract_song_feature("testing_song.mp3")
target_song = target_song.reshape(1, -1)

normalized_target_song = peak_normalization(target_song)

pred = model.predict(normalized_target_song)

print(labels[pred])

['pop']
