In [19]:
import os, torch, librosa, sklearn, librosa.display, glob
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn import preprocessing
from IPython.display import Audio
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

random_state = np.random.RandomState(0)

data_path = '/kaggle/input/gtzan-dataset-music-genre-classification/Data'
genres=(list(os.listdir(f'{data_path}/genres_original/')))

# Alphabetize the list of genres
sorted_genres = sorted(genres)

# Create a dictionary with genres as keys and values from 0 to n-1
genre_dict = {genre: index for index, genre in enumerate(sorted_genres)}
print(genre_dict)

{'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}


In [20]:
data = pd.read_csv(f'{data_path}/features_30_sec.csv')
data = data.iloc[0:, 1:] 
data.head()

y = data['label'] # genre variable.
X = data.loc[:, data.columns != 'label'] #select all columns but not the labels

#### NORMALIZE X ####

# Normalize so everything is on the same scale. 

cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)

# new data frame with the new scaled data. 
X = pd.DataFrame(np_scaled, columns = cols)

In [21]:
# Testing with a KNN #

# Step 1: Prepare the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=random_state)

# Step 2: Select the model (KNN)
knn = KNeighborsClassifier(n_neighbors=4) 

# Step 4: Train the model
knn.fit(X_train, y_train)

# Step 5: Evaluate the model
y_pred = knn.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.69
Classification Report:
              precision    recall  f1-score   support

       blues       0.75      0.78      0.76        27
   classical       0.66      1.00      0.79        21
     country       0.58      0.64      0.61        33
       disco       0.43      0.62      0.51        29
      hiphop       0.77      0.68      0.72        34
        jazz       0.89      0.61      0.72        28
       metal       0.92      0.74      0.82        31
         pop       0.92      0.71      0.80        34
      reggae       0.60      0.58      0.59        26
        rock       0.65      0.65      0.65        37

    accuracy                           0.69       300
   macro avg       0.72      0.70      0.70       300
weighted avg       0.72      0.69      0.70       300



In [22]:
# Transforming audio files into Mel spectrograms and saving

print("Transforming the Audio Files into Mel Spectrograms:")
batch_size = 32

hop_length = 512
forbidden = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav'
mel_spectogram_data = {}
for genre in genre_dict.keys():
    
    mel_spectogram_data[genre] = []
    for name in glob.glob(data_path + "/genres_original/" + genre + "/*"):
        if name != forbidden:
            data,sampling_rate = librosa.load(name)

            mel_spec = librosa.feature.melspectrogram(y = data.ravel(), sr=sampling_rate,hop_length = hop_length)
            mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)

            mel_spectogram_data[genre].append(mel_spec_db)


print("Saving the Mel Spectrogram Images:")
path = "./spectrogram/" 
if not os.path.exists(path):
    os.mkdir(path)
           

for genre in genre_dict.keys():
    
    if not os.path.exists(path + genre):
        os.mkdir(path + genre)

    for i in range(len(mel_spectogram_data[genre])):
        fig, ax = plt.subplots(1, figsize=(12,8))

        img = librosa.display.specshow(mel_spectogram_data[genre][i], sr = sampling_rate, hop_length = hop_length,cmap = 'cool',ax=ax)

        fig.savefig(path + genre + "/" + genre + "_" + str(i) + ".png")
        plt.close()

Transforming the Audio Files into Mel Spectrograms:
Saving the Mel Spectrogram Images:
