In [7]:
import tensorflow as tf
import tensorflow_hub as tfhub
import librosa 
import numpy as np
import pandas as pd
import os
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm

yamnet_model = tfhub.load("https://tfhub.dev/google/yamnet/1")

In [None]:
dataset_path = "../librosa/vibes"
dataset_csv_path = "songs_embedding.csv"

X = []
y = []

vibe_map = {"calm": 0,
            "chaotic": 1,
            "chill": 2,
            "energetic": 3}

if not os.path.exists(dataset_csv_path):
    for vibe in os.listdir(dataset_path):
        vibe_path = os.path.join(dataset_path, vibe)
        for file in tqdm(os.listdir(vibe_path), desc=f"Extracting the songs embbeding in folder {vibe}"):
            file_path = os.path.join(vibe_path, file)
            try:
                waveform, _ = librosa.load(file_path, sr=16000, mono=True)
                waveform = waveform.astype(np.float32)

                _, embeddings, _ = yamnet_model(waveform)
                song_embeddings = tf.reduce_mean(embeddings, axis=0)

                X.append(song_embeddings.numpy())
                y.append(vibe_map[vibe]) 

            except:
                print(f"Error has occured in file path {file_path}")

In [None]:
if not os.path.exists(dataset_csv_path):
    df = pd.DataFrame(X)
    df["label"] = y

    df.to_csv(dataset_csv_path)

In [None]:
df = pd.read_csv(dataset_csv_path, index_col=0)

X = np.array(df.drop(columns=["label"]))
y = np.array(df["label"])

vibe_label = np.array(["calm",
                       "chaotic",
                       "chill",
                       "energetic"])

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=42,
                                                    test_size=0.2,
                                                    stratify=y)

xgb_model = XGBClassifier(n_estimators=300,
                          max_depth=15,
                          learning_rate=0.1)

xgb_model.fit(X_train, y_train)

y_pred = xgb_model.predict(X_test)
print("Predicted:", vibe_label[y_pred])


Predicted: ['country' 'metal' 'pop' 'country' 'metal' 'hiphop' 'classical' 'hiphop'
 'country' 'blues' 'disco' 'blues' 'classical' 'blues' 'metal' 'reggae'
 'classical' 'country' 'rock' 'pop' 'hiphop' 'disco' 'classical'
 'classical' 'disco' 'reggae' 'jazz' 'pop' 'classical' 'rock' 'hiphop'
 'country' 'blues' 'rock' 'rock' 'blues' 'jazz' 'jazz' 'blues' 'hiphop'
 'metal' 'pop' 'disco' 'country' 'pop' 'hiphop' 'rock' 'metal' 'reggae'
 'classical' 'blues' 'hiphop' 'disco' 'reggae' 'reggae' 'rock' 'reggae'
 'hiphop' 'hiphop' 'metal' 'reggae' 'disco' 'classical' 'hiphop' 'metal'
 'rock' 'pop' 'metal' 'pop' 'classical' 'reggae' 'jazz' 'blues' 'jazz'
 'hiphop' 'disco' 'blues' 'metal' 'hiphop' 'disco' 'reggae' 'hiphop'
 'reggae' 'classical' 'country' 'country' 'blues' 'hiphop' 'country'
 'country' 'rock' 'country' 'classical' 'hiphop' 'metal' 'reggae' 'disco'
 'jazz' 'blues' 'classical' 'blues' 'country' 'pop' 'classical' 'hiphop'
 'disco' 'metal' 'hiphop' 'disco' 'blues' 'disco' 'jazz' 'regga

In [None]:
report = classification_report(y_test, y_pred, target_names=vibe_label)

print("report:", report)

report:               precision    recall  f1-score   support

       blues       0.83      0.83      0.83        18
   classical       0.95      0.95      0.95        22
     country       0.67      0.93      0.78        15
       disco       0.78      0.90      0.84        20
      hiphop       0.93      0.93      0.93        27
        jazz       0.85      1.00      0.92        11
       metal       1.00      0.95      0.97        19
         pop       0.90      0.79      0.84        24
      reggae       1.00      0.90      0.95        21
        rock       0.78      0.61      0.68        23

    accuracy                           0.87       200
   macro avg       0.87      0.88      0.87       200
weighted avg       0.88      0.87      0.87       200



In [None]:
waveform, _ = librosa.load("../librosa/reggae.992.mp3")
waveform = waveform.astype(np.float32)

_, embeddings, _ = yamnet_model(waveform)
song_embeddings = tf.reduce_mean(embeddings, axis=0)

pred = xgb_model.predict(song_embeddings.numpy().reshape(1, -1))
print("Predicted:", vibe_label[pred[0]])

Predicted: rock
