In [1]:
#IMPORTING ALL THE BASIC NECESSARY LIBRARIES
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

#SPECIAL LIBRARIES REQUIRED FOR THIS PROJECT
import librosa              #LIBRARY FOR READING THE AUDIO FILES(BOTH MONO & STEREO)
import IPython.display as ipd

In [2]:
#LOADING AN AUDIO FILE INTO A VARIABLE
filename = 'D:\\Clg_Projects\\Music Dataset\\[org][jaz_blu]1213__1.wav'

In [3]:
filename

'D:\\Clg_Projects\\Music Dataset\\[org][jaz_blu]1213__1.wav'

In [4]:
#READING THE AUDIO FILE
plt.figure(figsize=(18,5))
ipd.Audio(filename)

<Figure size 1296x360 with 0 Axes>

In [5]:
from librosa.display import *

audio_data, sample_rate = librosa.load(filename)

In [6]:
sample_rate

22050

In [7]:
audio_data

array([-0.18224473, -0.26882672, -0.2162941 , ..., -0.07922503,
       -0.10429961, -0.05132502], dtype=float32)

In [8]:
#APPLYING MFCC(MEL FREQUENCY CEPSTRAL COEFFICIENTS) FOR SINGLE AUDIO FILE
mfcc = librosa.feature.mfcc(y = audio_data, sr = sample_rate,n_mfcc=40)
mfcc.shape

(40, 130)

In [9]:
mfcc

array([[-137.21405  , -129.58437  , -102.872444 , ..., -145.49635  ,
        -120.51451  , -139.05743  ],
       [ 137.70598  ,  122.754295 ,   72.525406 , ...,   44.815613 ,
          19.428162 ,   31.389816 ],
       [  14.577824 ,   36.31743  ,   64.13518  , ...,   33.732452 ,
          42.750095 ,   37.935257 ],
       ...,
       [  -1.3172076,   -0.6218138,    2.0813878, ...,    6.2686634,
           6.3897896,    2.871343 ],
       [  -2.0229   ,   -1.3217193,   -2.0904846, ...,  -13.70071  ,
          -9.0021515,   -7.4901986],
       [  11.450668 ,    9.84563  ,    4.6452866, ...,  -20.63741  ,
         -18.95164  ,  -16.682823 ]], dtype=float32)

In [10]:
#DEFINING USER DEFINED FUNCTIONS FOR FEATURE EXTRACTION
import os
from librosa import *

def feature_extraction(file):
    
    audio_signal, sample_rate = librosa.load(filename)
    mfcc_features = librosa.feature.mfcc(y = audio_signal, sr = sample_rate,n_mfcc = 40)
    #print(mfcc_features)
    mfcc_scaled_features = np.mean(mfcc_features.T, axis = 0)
    
    return mfcc_scaled_features

def class_name(file):
    
    file_name = os.path.splitext(file)
    class_name = file_name[0][1:4]
    
    return class_name


In [11]:
#class_name(file = '[cel][cla]0001__2.wav') 

In [12]:
!pip install tqdm 



In [13]:
#EXTRACTING MFCC VALUES FOR EACH AND EVERY AUDIO FILE IN THE WHOLE DATASET

from tqdm import tqdm
path = 'D:\\Clg_Projects\\Music Dataset\\'
audio_files = os.listdir(path)
extracted_features = []
for item in tqdm(audio_files):
    if item.endswith('.wav'):
        data = feature_extraction(item)
        class_labels = class_name(item)
        extracted_features.append([data,class_labels])

100%|██████████████████████████████████████████████████████████████████████████████| 6706/6706 [01:23<00:00, 79.89it/s]


In [14]:
#CONVERTING THE EXTRACTED MFCC FEATURES INTO A DATAFRAME WITH THEIR CORRESPONDING CLASS LABELS
extracted_features_dataframe = pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_dataframe.head()

Unnamed: 0,feature,class
0,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",cel
1,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",cel
2,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",cel
3,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",cel
4,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",cel


In [15]:
extracted_features_dataframe.tail()

Unnamed: 0,feature,class
6700,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",voi
6701,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",voi
6702,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",voi
6703,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",voi
6704,"[-87.64163, 82.2469, 24.188623, 16.11406, 1.97...",voi


In [16]:
#SPLITTING THE DATAFRAME INTO DEPENDENT AND INDEPENDENT VARIBALES AND CONVERTING THEM INTO AN ARRAY
x = np.array(extracted_features_dataframe['feature'].tolist())
y = np.array(extracted_features_dataframe['class'].tolist())

In [17]:
x.shape

(6705, 40)

In [18]:
#CONVERTING THE ARRAY 'y'(CATEGORICAL VARIABLE) INTO DUMMY VARIABLES
y = np.array(pd.get_dummies(y))
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]], dtype=uint8)

In [19]:
y.shape

(6705, 11)

In [20]:
#SPLITTING THE DATASET INTO TRAINING AND TESTING SET
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 42)

In [21]:
x_train.shape

(5364, 40)

In [22]:
x_test.shape

(1341, 40)