In [1]:
import os
import tqdm
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import librosa.display
import seaborn as sns 
import catboost as cb 

from feature_extraction import *

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.linear_model import LogisticRegression 

from xgboost import XGBClassifier 
from sklearn import preprocessing 
from sklearn.model_selection import train_test_split 

## ADQUIRE DATASET

In [2]:
genre_directory = './data/genres_original'
genre_label = {}
label_value = 0

In [3]:
'''
@description: Assigning the folder string as label value
'''
for folder in os.listdir(genre_directory):
	genre_label[folder] = label_value
	label_value += 1

genre_label = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}

import pandas as pd

# Inicializa o DataFrame
df = pd.DataFrame(columns=['audio_data', 'label', 'filename'])

for folder in os.listdir(genre_directory):
    print('Processing folder {0}'.format(folder))
    for files in tqdm.tqdm(os.listdir(os.path.join(genre_directory, folder))):
        try:
            features_df = extract_feature_means(os.path.join(genre_directory, folder, files))
            # Include the audio file name in the DataFrame
            features_df['filename'] = files
            features_df['label'] = folder
            df = pd.concat([df, features_df], ignore_index=True)
        except Exception as e:
            continue
            #print(f"Error processing {files}: {e}")
print('Done!')

Processing folder blues


100%|██████████| 100/100 [03:28<00:00,  2.09s/it]


Processing folder classical


100%|██████████| 100/100 [03:30<00:00,  2.11s/it]


Processing folder country


100%|██████████| 100/100 [03:30<00:00,  2.11s/it]


Processing folder disco


100%|██████████| 100/100 [03:30<00:00,  2.11s/it]


Processing folder hiphop


100%|██████████| 100/100 [03:30<00:00,  2.11s/it]


Processing folder jazz


  return f(*args, **kwargs)
100%|██████████| 100/100 [03:30<00:00,  2.11s/it]


Processing folder metal


100%|██████████| 100/100 [03:32<00:00,  2.13s/it]


Processing folder pop


100%|██████████| 100/100 [03:35<00:00,  2.16s/it]


Processing folder reggae


100%|██████████| 100/100 [03:37<00:00,  2.17s/it]


Processing folder rock


100%|██████████| 100/100 [03:37<00:00,  2.18s/it]

Done!





In [4]:
df = df.drop(columns=['audio_data', 'file_name'])

In [5]:
df.to_csv('audio_data_genre.csv', index=False)

## PREPROCESSING

In [21]:
label_encoder = preprocessing.LabelEncoder() 
df['label'] = label_encoder.fit_transform(df['label'])

In [24]:
X = df.drop(['label','filename'],axis=1) 
y = df['label']

In [26]:
cols = X.columns 
minmax = preprocessing.MinMaxScaler() 
np_scaled = minmax.fit_transform(X) 

# new data frame with the new scaled data. 
X = pd.DataFrame(np_scaled, columns = cols)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
													test_size=0.3, 
													random_state=111) 
X_train.shape, X_test.shape, y_train.shape, y_test.shape


((699, 66), (300, 66), (699,), (300,))

In [28]:
def plot_confusion_matrix(y_test, preds, clf):
    cm = confusion_matrix(y_test, preds)
    plt.figure(figsize=(10,7))
    sns.heatmap(cm, annot=True, fmt='d')
    plt.title('Confusion Matrix for ' + clf.__class__.__name__)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

## CLASSIFICATION

In [29]:
classifier_list = []

classifier_list.append(RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0))
classifier_list.append(cb.CatBoostClassifier(verbose=0, eval_metric='Accuracy', loss_function='MultiClass'))
classifier_list.append(XGBClassifier(n_estimators=1000, learning_rate=0.05))

for clf in classifier_list: 
	clf.fit(X_train, y_train) 
	preds = clf.predict(X_test) 
	print(clf.__class__.__name__,accuracy_score(y_test, preds))

RandomForestClassifier 0.6666666666666666
CatBoostClassifier 0.71
XGBClassifier 0.7166666666666667


In [40]:
t1 = extract_feature_means('./predict/test.wav').drop(columns=['file_name'])

In [38]:
t1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 66 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   zero_crossing_rate             1 non-null      float64
 1   zero_crossings                 1 non-null      int32  
 2   spectrogram                    1 non-null      float32
 3   mel_spectrogram                1 non-null      float32
 4   harmonics                      1 non-null      float32
 5   perceptual_shock_wave          1 non-null      float32
 6   spectral_centroids             1 non-null      float64
 7   spectral_centroids_delta       1 non-null      float64
 8   spectral_centroids_accelerate  1 non-null      float64
 9   chroma1                        1 non-null      float32
 10  chroma2                        1 non-null      float32
 11  chroma3                        1 non-null      float32
 12  chroma4                        1 non-null      float32

In [39]:
classifier_list[2].predict(t1)

array([8], dtype=int64)