In [2]:
# feature extractoring and preprocessing data
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [5]:
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
genres = 'classical jazz metal pop rock'.split()
for g in genres:
    pathlib.Path(f'dataset/img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./dataset/genres/{g}'):
        songname = f'./dataset/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'dataset/img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()
 

<Figure size 720x720 with 0 Axes>

Extracting features

In [21]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
#for i in range(1, 21):
for i in range(1, 20):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [22]:
file = open('./dataset/data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for g in genres:
    for filename in os.listdir(f'./dataset/genres/{g}'):
        songname = f'./dataset/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('./dataset/data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

Analyzing in pandas

In [23]:
data = pd.read_csv('./dataset/data.csv')
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,label
0,classical.00004.au,0.223171,1705.704611,1646.292971,3175.73913,0.112867,-259.909976,118.939289,-35.611851,31.977636,...,-1.196833,-6.347265,2.761452,-2.592574,4.384807,-4.136895,-4.779474,-4.77236,-4.608322,classical
1,classical.00001.au,0.215064,1360.408354,1441.143831,2387.218898,0.087772,-324.058769,133.610044,-25.0311,24.567555,...,3.5224,5.041735,1.361834,3.003346,-3.124863,-4.291534,0.591218,2.871973,0.366271,classical
2,classical.00002.au,0.277865,1490.03481,1599.959102,2784.916493,0.09005,-235.255816,141.128195,-34.666853,30.979235,...,8.909783,-1.433871,4.281025,1.482141,3.187973,-1.309518,2.929818,-1.661003,-2.896666,classical
3,classical.00003.au,0.234236,1526.647067,1499.532617,2915.974034,0.108952,-326.864836,127.39131,-30.596096,37.468801,...,2.807639,7.252176,3.980639,-2.389035,1.490968,0.583444,-0.610587,2.334469,2.649742,classical
4,classical.00008.au,0.254774,1516.626219,1629.500705,2973.782018,0.078788,-292.1283,127.769046,-29.901041,37.430188,...,4.709107,0.377206,1.464333,3.393674,6.076538,0.963429,-0.75366,-2.105312,0.755132,classical


In [24]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

In [28]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

Scaling feature columns

In [29]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-2], dtype = float))

# Dividing into train/test

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [38]:
len(y_train), len(y_test)

(40, 10)

In [39]:
len(genres)

5

# Building the network

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

# 5 genres
model.add(layers.Dense(5, activation='softmax'))