In [1]:
import IPython.display as ipd
import matplotlib.pyplot as plt
import librosa.display
import time
import warnings; warnings.simplefilter('ignore')
import pandas as pd
import numpy as np
import youtube_dl
import librosa
import os
import re
import tensorflow as tf

from tensorflow.keras import optimizers
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Sequential
from __future__ import unicode_literals
from os import path, listdir 
from os.path import isfile, join
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [16]:
def music_path(path):
    directories = [i for i in listdir(path) if not i.startswith(".")]
    for genre in directories:
        for song in listdir(f"{path}/{genre}"):
            if not song.startswith("."):
                yield f"{path}/{genre}/{song}", genre
                
def add_features(song, sr):
    res = []
    for part in song:
        union = part
        
        mfcc = librosa.feature.mfcc(part, sr)
        for element in mfcc:
            union = np.concatenate((union, element), axis=None)
        
        chroma_stft = librosa.feature.chroma_stft(part, sr)
        for element in chroma_stft:
            union = np.concatenate((union, element), axis=None)
        
        spectral_centroid = librosa.feature.spectral_centroid(part, sr)
        union = np.concatenate((union, spectral_centroid), axis=None)
        
        zero_crossing_rate = librosa.feature.zero_crossing_rate(part, sr)
        union = np.concatenate((union, zero_crossing_rate), axis=None)
        res.append(union)
    return np.array(res)
        
def split_song(song, sr, seconds = 5):
    res = []
    for i in range(1, len(song)//(sr*seconds)):
        res.append(song[(i-1)*sr*seconds : i*sr*seconds])
    return np.array(res)
                
def load_song(path):
    x , sr = librosa.load(path, mono=True, sr=44100)
    splited_song = split_song(x,sr,5)
    for song in add_features(splited_song, sr):
        yield song

def songs_array(default = 'music'):
    data = []
    y = []
    for song_path, genre in music_path(default): 
        for part_song in load_song(song_path):
            data.append(part_song)
            y.append(genre)
    return np.array(data), np.array(y)

In [17]:
x,y = songs_array()

In [6]:
x.shape, y.shape

((297, 235154), (297,))

In [7]:
genre = pd.DataFrame(data=y, columns=["Genero"])
ohe = OneHotEncoder()
ohe.fit(genre[["Genero"]])
genre_ohe = ohe.transform(genre[["Genero"]]).todense()
genre_ohe

matrix([[0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0

In [8]:
df_ohe = pd.DataFrame(data=genre_ohe, columns=['Classical', 'Blues', 'Country'])
df_ohe

Unnamed: 0,Classical,Blues,Country
0,0.0,1.0,0.0
1,0.0,1.0,0.0
2,0.0,1.0,0.0
3,0.0,1.0,0.0
4,0.0,1.0,0.0
...,...,...,...
292,0.0,0.0,1.0
293,0.0,0.0,1.0
294,0.0,0.0,1.0
295,0.0,0.0,1.0


In [9]:
X_train, X_val, y_train, y_val = train_test_split(x, genre_ohe, random_state=42)

In [10]:
model = Sequential([
    Dense(512, activation="relu", input_shape = X_train.shape[1:]),
    Dense(1024, activation="relu"),
    Dense(1024, activation="relu"),
    Dense(1024, activation="relu"),
    Dense(1024, activation="relu"),
    Dense(1024, activation="relu"),
    Dense(3, activation="softmax")
])

2021-10-20 23:11:23.990914: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-10-20 23:11:23.991084: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-20 23:11:23.992730: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [11]:
optimizer = optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
              loss="categorical_crossentropy",
              metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               120399360 
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              525312    
_________________________________________________________________
dense_2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_5 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 3

In [12]:
history = model.fit(X_train, 
                    y_train,
         validation_data=(X_val, 
                          y_val),
         epochs=100,
         verbose=1,
         batch_size=32)

Epoch 1/100


2021-10-20 23:11:24.390109: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-10-20 23:11:24.390413: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3800020000 Hz


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [59]:
test_x ,test_y = songs_array('one')

In [60]:
test_x.shape

(5, 235154)

In [61]:
y_pred = model.predict(test_x)

In [62]:
classical = 0
blues = 0
country = 0
for i in y_pred:
    blues += i[0]
    classical += i[1]
    country += i[2]
print(f"Classical: {classical/len(y_pred)*100}")
print(f"Blues: {blues/len(y_pred)*100}")
print(f"Country: {country/len(y_pred)*100}")

Classical: 0.002268746029638905
Blues: 11.673844183757828
Country: 88.32388818264008
