In [None]:
import numpy as np
import matplotlib.pyplot as plt
import librosa
import os #to access files efficiently
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.image import resize
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_dir = '/content/drive/My Drive/Audionet end_eval/Guitar Dataset'


classes = ['A2', 'A3', 'A4', 'Asharp2', 'Asharp3', 'Asharp4', 'B2', 'B3', 'B4', 'C3', 'C4', 'C5',
          'Csharp3', 'Csharp4', 'Csharp5', 'D2', 'D3', 'D4', 'D5', 'Dsharp2', 'Dsharp3', 'Dsharp4',
          'Dsharp5', 'E2', 'E3', 'E4', 'E5', 'F2', 'F3', 'F4', 'F5', 'Fsharp2', 'Fsharp3', 'Fsharp4',
          'Fsharp5', 'G2', 'G3', 'G4', 'G5', 'Gsharp2', 'Gsharp3', 'Gsharp4', 'Gsharp5']

In [None]:
data = []
labels = []
target_shape = (128, 128)

for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_dir, filename)
                audio_data, sample_rate = librosa.load(file_path, sr=44100)

                #Normalize data
                audio_data = librosa.util.normalize(audio_data)

                #Convert to spectrograms for applying cnn
                mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
                mel_spectrogram = tf.image.resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
                data.append(mel_spectrogram)
                labels.append(i)

data = np.array(data)
labels = np.array(labels)

In [None]:
labels = to_categorical(labels, num_classes=len(classes)) #one hot encoding
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42) #data split

In [None]:
input_shape = X_train[0].shape
input_layer = Input(shape=input_shape)

model = tf.keras.Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu',input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(classes), activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=15, batch_size=16, validation_data=(X_test, y_test)) #fit the model on the spectrograms

Epoch 1/15
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 3s/step - accuracy: 0.2540 - loss: 14.6172 - val_accuracy: 0.7526 - val_loss: 1.2000
Epoch 2/15
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 3s/step - accuracy: 0.6467 - loss: 1.5982 - val_accuracy: 0.8454 - val_loss: 0.6242
Epoch 3/15
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 3s/step - accuracy: 0.8209 - loss: 0.7569 - val_accuracy: 0.9072 - val_loss: 0.4553
Epoch 4/15
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 3s/step - accuracy: 0.8327 - loss: 0.7391 - val_accuracy: 0.9107 - val_loss: 0.3843
Epoch 5/15
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 3s/step - accuracy: 0.8701 - loss: 0.4308 - val_accuracy: 0.9725 - val_loss: 0.1309
Epoch 6/15
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 3s/step - accuracy: 0.9081 - loss: 0.3554 - val_accuracy: 0.9725 - val_loss: 0.1475
Epoch 7/15
[1m73/73[0m [32m━━━

<keras.src.callbacks.history.History at 0x7972a369f390>

In [None]:
test_accuracy = model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1]) #print accuracy

0.969072163105011
