In [1]:
import numpy as np 
import pandas as pd
import json
import matplotlib.pyplot as plt
import os
# from tqdm import tqdm
import librosa
from librosa.feature import mfcc
from IPython.display import display, clear_output
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score , accuracy_score

In [2]:
f = open("/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Notebooks/config.json")
data_para = json.load(f)

In [3]:
genre_path = '/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile'
file_path = [os.path.join(genre_path,x) for x in os.listdir(genre_path)]
genres = [x for x in os.listdir(genre_path)]
file_path.remove('/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile/.DS_Store')
file_path

['/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile/Power',
 '/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile/Sorrow',
 '/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile/Romance',
 '/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile/Calm',
 '/Users/tharindudamruwan/Desktop/ML and AI for DS/Intellihack/Songs/WavFile/Joy']

In [None]:
audio_paths = []
for i in tqdm(file_path):
    for j in os.listdir(i):
        file = os.path.join(i,j)
        audio_paths.append(file)

In [None]:
def get_frame_mfccs(path):
    audio, sr = librosa.load(path)
    frames = librosa.util.frame(audio, frame_length=sr*3, hop_length=sr*3)
    frame_mfccs = []
    for i in range(frames.shape[1]):
        mfccs = mfcc(y=frames[:,i],sr=sr,n_mfcc=13,hop_length=512,n_fft=2048)
        frame_mfccs.append(mfccs)
    return frame_mfccs

In [None]:
data = []
labels = []
for path in tqdm(audio_paths):
    display(path)
    genre = path.split('/')[8]
    try:
        fmccs = get_frame_mfccs(path)
        clear_output(wait=True)
        for frame in fmccs:
            data.append(frame)
            labels.append(genre)
    except Exception:
        pass
print(audio_paths)

In [None]:
assert len(data) == len(labels)


In [None]:
def reshape(data,shape=(26,65)):
    assert data.shape == (13,130) , f"The Data shape should be (13,130) but got {data.shape}"
    data = data.reshape(shape)
    data = np.expand_dims(data,axis=-1)
    return data

In [None]:
processed_data = np.array([reshape(x) for x in data])

In [None]:
from sklearn.preprocessing import LabelEncoder
lbenc = LabelEncoder()
labels = lbenc.fit_transform(labels)
len(labels)

In [None]:
classes = lbenc.classes_
print(classes)

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_val,y_train,y_val = train_test_split(processed_data,labels,test_size=data_para['config'][0]['test_size'],
                                               shuffle=data_para['config'][0]['shuffle'],random_state=data_para['config'][0]['random_state'])

In [None]:
from tensorflow.keras.layers import InputLayer , Conv2D , AveragePooling2D , GlobalAvgPool2D , Dense
import tensorflow as tf 
tf.__version__

In [None]:
model = tf.keras.models.Sequential([
    InputLayer(input_shape=(26,65,1)),
    Conv2D(512,(3),padding='valid',activation='relu'),
    Conv2D(256,(3),padding='valid',activation='relu'),
    AveragePooling2D(pool_size=(3),strides=(2),padding='same'),
    Conv2D(256,(3),padding='valid',activation='relu'),
    AveragePooling2D(pool_size=(3),strides=(2),padding='same'),
    Conv2D(256,(4),padding='valid',activation='relu'),
    GlobalAvgPool2D(),
    Dense(128,activation='relu'),
    Dense(64,activation='relu'),
    Dense(5,activation='softmax')
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
             loss=tf.keras.losses.sparse_categorical_crossentropy,
             metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(x_train,y_train,batch_size=data_para['config'][0]['batch_size'],epochs=data_para['config'][0]['epochs'])

In [None]:
history.history['loss']

In [None]:
epochs = np.arange(0,10)
plt.subplot(1,2,1)
plt.plot(epochs,history.history['loss'],color='r')
plt.title('Model loss (sparse categorical crossentropy)')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.tight_layout()
plt.subplot(1,2,2)
plt.plot(epochs,history.history['accuracy'],color='g')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.tight_layout()
plt.show()

In [None]:
pred = model.predict(x_val)
preds = []
for i in pred:
    out = np.argmax(i)
    preds.append(out)
f1score = f1_score(y_val,preds,average='micro')
accuracy = accuracy_score(y_val,preds)

In [None]:
print(f"The Test Accuracy is {accuracy} \n The F1 Score is {f1score}")

In [None]:
model.save('final_model')

In [None]:
cm = confusion_matrix(y_val,preds)
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = 'd'
thresh = cm.max() / 2.
for i, j in np.ndindex(cm.shape):
    plt.text(j, i, format(cm[i, j], fmt),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()