In [9]:
import os, torch, librosa, sklearn, librosa.display, glob
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn import preprocessing
from IPython.display import Audio
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from PIL import Image
from tensorflow.keras.preprocessing.image import img_to_array

random_state = np.random.RandomState(0)

data_path = '/kaggle/input/gtzan-dataset-music-genre-classification/Data'
genres=(list(os.listdir(f'{data_path}/genres_original/')))

# Alphabetize the list of genres
sorted_genres = sorted(genres)

# Create a dictionary with genres as keys and values from 0 to n-1
genre_dict = {genre: index for index, genre in enumerate(sorted_genres)}
print(genre_dict)

{'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}


In [10]:
# clear outputs in kaggle
def remove_folder_contents(folder):
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                remove_folder_contents(file_path)
                os.rmdir(file_path)
        except Exception as e:
            print(e)

folder_path = '/kaggle/working'
remove_folder_contents(folder_path)

In [21]:
# Transforming audio files into Mel spectrograms and saving

print("Transforming the Audio Files into Mel Spectrograms:")
batch_size = 32

hop_length = 512
forbidden = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav'
mel_spectogram_data = {}
for genre in genre_dict.keys():
    
    mel_spectogram_data[genre] = []
    for name in glob.glob(data_path + "/genres_original/" + genre + "/*"):
        if name != forbidden:
            data,sampling_rate = librosa.load(name)

            mel_spec = librosa.feature.melspectrogram(y = data.ravel(), sr=sampling_rate,hop_length = hop_length)
            mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)

            mel_spectogram_data[genre].append(mel_spec_db)



Transforming the Audio Files into Mel Spectrograms:


In [4]:
print("Saving the Mel Spectrogram Images:")
plt.ioff()           
x = []
y = []
for genre in genre_dict.keys():

   
    for i in range(len(mel_spectogram_data[genre])):
        plt.figure(figsize=(16, 6))

        img = librosa.display.specshow(mel_spectogram_data[genre][i], sr = sampling_rate, hop_length = hop_length,cmap = 'cool', y_axis='mel', fmax=8000, x_axis='time')
        image_path = genre + "_" + str(i) + ".png"
        x.append(image_path)
        y.append(genre)
        if not os.path.exists(image_path):
            plt.savefig(image_path)
        plt.close()
df = pd.DataFrame({'image_path': x, 'genre': y})

Saving the Mel Spectrogram Images:


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Split the data
x_train, x_test, y_train, y_test = train_test_split(df['image_path'], df['genre'], stratify=df['genre'], test_size=0.3, random_state=0)

# Create a new DataFrame for training data
train_df = pd.DataFrame({'image_path': x_train, 'genre': y_train})

# Create ImageDataGenerators
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

# Create the training set
training_set = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='genre',
    target_size=(224, 224),  # Adjusted size
    batch_size=32,
    class_mode='categorical',
    subset='training',
)

test_set = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='genre',
    target_size=(224, 224),  # Adjusted size
    batch_size=32,
    class_mode='categorical',
    subset='training',
)

Found 560 validated image filenames belonging to 10 classes.
Found 560 validated image filenames belonging to 10 classes.


In [27]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout
from keras.layers import Flatten, Dense
input_shape=(224, 224, 3)

# build network topology

model=Sequential([
    Flatten(input_shape=input_shape),
    Dropout(0.2),
    Dense(512,activation='relu'),
    Dropout(0.2),
    
    Dense(256,activation='relu'),
    Dropout(0.2),
    
    Dense(128, activation='relu'),
    Dropout(0.2),

    Dense(64, activation='relu'),
    Dropout(0.2),

    Dense(32, activation='relu'),
    Dropout(0.2),

    Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
             loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])


In [7]:
hist = model.fit(training_set,steps_per_epoch=len(training_set), validation_data=test_set, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [42]:
# Display results. Doesn't work yet

acc = hist.history['accuracy']
val_acc = hist.history['val_accuracy']
epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, '-', label='Training Accuracy')
plt.plot(epochs, val_acc, ':', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.plot()

[]