## CNN for music genre classification

In [2]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
from glob import glob
from PIL import Image
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization, Dropout

In [3]:
import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))

Num GPUs Available: 1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Drive setup

In [4]:
SPEC_TRAIN_FOLDER = "./SegmentedAudio/train"
SPEC_TEST_FOLDER = "./SegmentedAudio/test"

## Constants

In [5]:
IMAGE_SIZE = (130, 128)

In [6]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=SPEC_TRAIN_FOLDER,
    labels='inferred',
    label_mode='int',
    image_size=IMAGE_SIZE,
    batch_size=32,
    color_mode='rgb'
)

Found 7992 files belonging to 10 classes.


I0000 00:00:1747268740.907379   12044 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5520 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [7]:
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=SPEC_TEST_FOLDER,
    labels='inferred',
    label_mode='int',
    image_size=IMAGE_SIZE,
    batch_size=32,
    color_mode='rgb'
)

Found 1999 files belonging to 10 classes.


In [8]:
def get_model(input_shape):

    input_shape = (input_shape[0], input_shape[1], 3)
    model  =  Sequential([
                    Conv2D(32, kernel_size=3, padding='same', activation="relu", input_shape=input_shape),
                    MaxPooling2D((2,2)),
                    BatchNormalization(),
                     Conv2D(32, kernel_size=3, padding='same', activation="relu", input_shape=input_shape),
                    MaxPooling2D((2,2)),
                    BatchNormalization(),
                     Conv2D(32, kernel_size=3, padding='same', activation="relu", input_shape=input_shape),
                    MaxPooling2D((2,2)),
                    BatchNormalization(),
                    Dropout(0.2),
                    Flatten(),
                    Dense(64, activation='relu'),
                    Dense(10, activation='softmax')
    ])
    return model

In [9]:
def compile_model(model):
   model.compile(optimizer= "adam", loss = "sparse_categorical_crossentropy", metrics=['accuracy'])

In [10]:
def train_model(model, train_ds, epochs):
    model.fit(train_ds, epochs=epochs)

In [11]:
model = get_model(IMAGE_SIZE)
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
compile_model(model)

In [17]:
train_model(model, train_ds, 100)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.9937 - loss: 0.0177
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9819 - loss: 0.0580
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9903 - loss: 0.0272
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9910 - loss: 0.0249
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9952 - loss: 0.0124
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9930 - loss: 0.0169
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9951 - loss: 0.0146
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9900 - loss: 0.0309
Epoch 9/100
[1m250/250[

In [18]:
test_loss, test_acc = model.evaluate(test_ds)

print('\nTest accuracy:', test_acc)
print('\n Test lost:', test_loss)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m-1s[0m -12982us/step - accuracy: 0.7046 - loss: 4.3067

Test accuracy: 0.7088544368743896

 Test lost: 4.0523552894592285
