In [2]:
import pandas as pd 
import numpy as np 

from tensorflow import keras
import os

import soundfile as sf

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns 

import librosa
import librosa.display

import warnings
warnings.filterwarnings('ignore')

### MFCC Features

In [3]:
def mfcc_features(file_path):
    # Loading audio file to librosa
    y, sr = librosa.load(file_path, offset=0, duration=30)
    
    # obtain mfcc features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40, hop_length=512, n_fft=2048)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_var = mfcc.std(axis=1)
    mfcc_min = mfcc.min(axis=1)
    mfcc_max = mfcc.max(axis=1)
    
    mfcc_1 = librosa.feature.mfcc(y=y, sr=sr, hop_length=512, n_fft=2048, n_mfcc=40)
    mfcc1_mean = mfcc_1.mean(axis=1)
    mfcc1_var = mfcc_1.std(axis=1)
    mfcc1_min = mfcc_1.min(axis=1)
    mfcc1_max = mfcc_1.max(axis=1)
    
    mfcc_2 = librosa.feature.mfcc(y=y, sr=sr, hop_length=256, n_fft=1024, n_mfcc=40)
    mfcc2_mean = mfcc_2.mean(axis=1)
    mfcc2_var = mfcc_2.std(axis=1)
    mfcc2_min = mfcc_2.min(axis=1)
    mfcc2_max = mfcc_2.max(axis=1)
    
    delta_mfcc = librosa.feature.delta(mfcc)
    delta_mean = delta_mfcc.mean(axis=1)
    delta_var = delta_mfcc.std(axis=1)
    
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    delta2_mean = delta2_mfcc.mean(axis=1)
    delta2_var = delta2_mfcc.std(axis=1)
    
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    mfcc_harmonic = librosa.feature.mfcc(y=y_harmonic, sr=sr, n_mfcc=40)
    harmonic_mean = mfcc_harmonic.mean(axis=1)
    harmonic_var = mfcc_harmonic.std(axis=1)
    
    mfcc_percussive = librosa.feature.mfcc(y=y_percussive, sr=sr, n_mfcc=40)
    percussive_mean = mfcc_percussive.mean(axis=1)
    percussive_var = mfcc_percussive.std(axis=1)

    
    mfcc_feature = np.concatenate((mfcc_mean, mfcc_var, mfcc_min, mfcc_max, 
                                  mfcc1_mean, mfcc1_var, mfcc1_min, mfcc1_max,
                                  mfcc2_mean, mfcc2_var, mfcc2_min, mfcc2_max,
                                  delta_mean, delta_var, delta2_mean, delta2_var,
                                  harmonic_mean, harmonic_var, 
                                  percussive_mean, percussive_var))

    return mfcc_feature


In [4]:
def process_wav_files(directory):
    data = {
        'file_name': [],
        'mfcc_mean': [], 'mfcc_var': [], 'mfcc_min': [],'mfcc_max': [],
        'mfcc1_mean': [], 'mfcc1_var': [], 'mfcc1_min': [],'mfcc1_max': [],
        'mfcc2_mean': [], 'mfcc2_var': [], 'mfcc2_min': [],'mfcc2_max': [],
        'delta_mean': [], 'delta_var':[], 'delta2_mean': [], 'delta2_var':[], 
        'harmonic_mean': [], 'harmonic_var':[], 'percussive_mean': [], 'percussive_var':[]
        
    }
    
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            feat = mfcc_features(file_path)
            data['file_name'].append(filename)
            data['mfcc_mean'].append(feat[0])
            data['mfcc_var'].append(feat[1])
            data['mfcc_min'].append(feat[2])
            data['mfcc_max'].append(feat[3])
            data['mfcc1_mean'].append(feat[4])
            data['mfcc1_var'].append(feat[5])
            data['mfcc1_min'].append(feat[6])
            data['mfcc1_max'].append(feat[7])
            data['mfcc2_mean'].append(feat[8])
            data['mfcc2_var'].append(feat[9])
            data['mfcc2_min'].append(feat[10])
            data['mfcc2_max'].append(feat[11])
            data['delta_mean'].append(feat[12])
            data['delta_var'].append(feat[13])
            data['delta2_mean'].append(feat[14])
            data['delta2_var'].append(feat[15])
            data['harmonic_mean'].append(feat[16])
            data['harmonic_var'].append(feat[17])
            data['percussive_mean'].append(feat[18])
            data['percussive_var'].append(feat[19])

    return data

In [5]:
directory = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/blues'
data = process_wav_files(directory)
blues_df = pd.DataFrame(data)

In [7]:
blues_df['genre'] = 'blues'

In [8]:
class_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/classical'
class_data = process_wav_files(class_dir)
classical_df = pd.DataFrame(class_data)

In [9]:
classical_df['genre'] = 'classical'

In [10]:
country_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/country'
country_data = process_wav_files(country_dir)
country_df = pd.DataFrame(country_data)

In [11]:
country_df['genre'] = 'country'

In [12]:
disco_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/disco'
disco_data = process_wav_files(disco_dir)
disco_df = pd.DataFrame(disco_data)

In [13]:
disco_df['genre'] = 'disco'

In [14]:
hiphop_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/hiphop'
hiphop_data = process_wav_files(hiphop_dir)
hiphop_df = pd.DataFrame(hiphop_data)

In [15]:
hiphop_df['genre'] = 'hiphop'

In [16]:
jazz_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/jazz'
jazz_data = process_wav_files(jazz_dir)
jazz_df = pd.DataFrame(jazz_data)

In [17]:
jazz_df['genre'] = 'jazz'

In [18]:
metal_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/metal'
metal_data = process_wav_files(metal_dir)
metal_df = pd.DataFrame(metal_data)

In [19]:
metal_df['genre'] = 'metal'

In [20]:
pop_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/pop'
pop_data = process_wav_files(pop_dir)
pop_df = pd.DataFrame(pop_data)

In [21]:
pop_df['genre'] = 'pop'

In [22]:
reggae_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/reggae'
reggae_data = process_wav_files(reggae_dir)
reggae_df = pd.DataFrame(reggae_data)

In [23]:
reggae_df['genre'] = 'reggae'

In [24]:
rock_dir = '/Users/kellyjara/Desktop/Test Data/genres_trimmed/rock'
rock_data = process_wav_files(rock_dir)
rock_df = pd.DataFrame(rock_data)

In [25]:
rock_df['genre'] = 'rock'

In [26]:
df = pd.concat([blues_df, classical_df, country_df, disco_df, hiphop_df, 
                jazz_df, metal_df, pop_df, reggae_df, rock_df], axis = 0)
df

Unnamed: 0,file_name,mfcc_mean,mfcc_var,mfcc_min,mfcc_max,mfcc1_mean,mfcc1_var,mfcc1_min,mfcc1_max,mfcc2_mean,...,mfcc2_max,delta_mean,delta_var,delta2_mean,delta2_var,harmonic_mean,harmonic_var,percussive_mean,percussive_var,genre
0,blues_69_segment_3.wav,-135.282867,108.550240,13.028976,43.992382,19.349297,20.206875,-1.577484,7.924092,1.519116,...,4.648852,4.786031,0.404394,-2.173331,2.286452,-5.007302,-0.264153,-2.354364,-2.502975,blues
1,blues_27_segment_4.wav,-91.254097,129.466553,-4.100496,45.220657,7.252443,16.338186,0.856577,0.009967,1.849428,...,6.253500,-3.555044,-2.782827,3.370219,6.914120,-1.414697,-0.071103,-4.808448,-11.935136,blues
2,blues_54_segment_2.wav,-191.496078,139.016464,11.991453,28.216597,-0.720076,7.438097,6.724214,6.649870,-0.442536,...,7.271035,2.949739,2.887134,-4.933522,-3.288434,-10.092828,-8.105810,-11.131342,-2.723835,blues
3,blues_33_segment_1.wav,-121.252815,123.478157,13.837003,28.114388,7.535354,15.141087,-3.699708,6.035746,0.958965,...,-1.073197,0.798008,-2.576288,-5.156488,3.080513,-6.622262,-0.429654,-0.840749,-2.610587,blues
4,blues.00009_segment_2.wav,-179.017548,138.225616,-30.912750,49.097755,-24.154978,7.536681,-9.466703,10.459454,-20.845018,...,0.095619,-7.698235,-6.822463,-9.076863,-3.736471,-0.479018,2.954151,0.616772,-0.804311,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,rock.00063_segment_2.wav,-329.291077,139.876709,53.949989,17.256727,17.011780,9.052016,8.448787,2.826561,0.794123,...,0.516435,3.066019,0.435119,3.739078,-0.854575,-1.531405,-3.096432,-1.511828,-2.540613,rock
739,rock.00004_segment_1.wav,-167.203339,143.874512,12.607652,50.331360,-0.389795,35.844570,-3.877623,16.113270,-9.246787,...,13.209968,-6.202027,2.846851,-5.243724,3.043988,-4.327083,-3.267553,-3.992585,-2.327364,rock
740,rock_36_segment_5.wav,-269.519012,182.936691,9.128018,7.246886,6.279178,-1.525924,-8.689502,7.118561,-5.731806,...,-1.625070,-10.613927,-0.030460,0.623478,-1.333453,-3.286373,0.691123,-6.654511,-1.414237,rock
741,rock_45_segment_3.wav,39.978085,83.575760,-13.916237,31.080481,9.406852,10.378225,-1.250743,9.424727,-1.540306,...,7.950949,0.446705,5.107883,0.307805,3.498081,-2.379462,3.510975,-0.273814,-0.689963,rock


### Adding More Features

In [27]:
def extract_features(file_path):
    # Loading audio file to librosa
    y, sr = librosa.load(file_path, offset=0, duration=30) 
    
    #obtain melspectogram features
    melspec = librosa.feature.melspectrogram(y=y, sr=sr)
    melspec_mean = melspec.mean(axis=1)
    melspec_var = melspec.std(axis=1)
    melspec_min = melspec.min(axis=1)
    melspec_max = melspec.max(axis=1)

    # obtain spectral centroid features
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    centroid_mean = centroid.mean(axis=1)
    centroid_var = centroid.std(axis=1)
    centroid_min = centroid.min(axis=1)
    centroid_max = centroid.max(axis=1)

    # obtain chroma vector features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    chroma_var = chroma.std(axis =1)
    chroma_min = chroma.min(axis=1)
    chroma_max = chroma.max(axis=1)

    #getting tonnetz
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    tntz_mean = tonnetz.mean(axis=1)
    tntz_var = tonnetz.std(axis=1)
    tntz_min = tonnetz.min(axis=1)
    tntz_max = tonnetz.max(axis=1)

    #root-mean-squared
    rms = librosa.feature.rms(y=y)
    rms_mean = rms.mean(axis = 1)
    rms_var = rms.std(axis = 1)

    #getting tempo
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = np.array(librosa.feature.tempo(onset_envelope=onset_env, sr=sr))


    features = np.concatenate((melspec_mean, melspec_var, melspec_min, melspec_max,
                               centroid_mean, centroid_var, centroid_min, centroid_max,
                               chroma_mean, chroma_var, chroma_min, chroma_max,
                               tntz_mean, tntz_var, tntz_min, tntz_max,
                               rms_mean, rms_var, tempo))
    return features


In [28]:
def process_wav(directory):
    data2 = {
        'file_name': [],
        'centroid_mean': [], 'centroid_var': [], 'centroid_min': [], 'centroid_max': [], 
        'chroma_mean': [], 'chroma_var': [], 'chroma_min': [], 'chroma_max': [],
        'tntz_mean': [], 'tntz_var': [], 'tntz_min': [], 'tntz_max': [],
        'melspec_mean': [], 'melspec_var': [], 'melspec_min': [], 'melspec_max': [],
        'rms_mean':[], 'rms_var':[],
        'tempo':[]
        
    }
    
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            feat = extract_features(file_path)
            data2['file_name'].append(filename)
            data2['centroid_mean'].append(feat[0])
            data2['centroid_var'].append(feat[1])
            data2['centroid_min'].append(feat[2])
            data2['centroid_max'].append(feat[3])
            data2['chroma_mean'].append(feat[4])
            data2['chroma_var'].append(feat[5])
            data2['chroma_min'].append(feat[6])
            data2['chroma_max'].append(feat[7])
            data2['tntz_mean'].append(feat[8])
            data2['tntz_var'].append(feat[9])
            data2['tntz_min'].append(feat[10])
            data2['tntz_max'].append(feat[11])
            data2['melspec_mean'].append(feat[12])
            data2['melspec_var'].append(feat[13])
            data2['melspec_min'].append(feat[14])
            data2['melspec_max'].append(feat[15])
            data2['rms_mean'].append(feat[16])
            data2['rms_var'].append(feat[17])
            data2['tempo'].append(feat[18])
    return data2

In [29]:
data2 = process_wav(pop_dir)
pop = pd.DataFrame(data2)

In [30]:
pop['genre'] = 'pop'

In [31]:
class_data2 = process_wav(class_dir)
classical = pd.DataFrame(class_data2)

In [32]:
classical['genre'] = 'classical'

In [33]:
country_data2 = process_wav(country_dir)
country = pd.DataFrame(country_data2)

In [34]:
country['genre'] = 'country'

In [35]:
metal_data2 = process_wav(metal_dir)
metal = pd.DataFrame(metal_data2)

In [36]:
metal['genre'] = 'metal'

In [37]:
hiphop_data2 = process_wav(hiphop_dir)
hiphop = pd.DataFrame(hiphop_data2)

In [38]:
hiphop['genre'] = 'hiphop'

In [39]:
blues_data2 = process_wav(directory)
blues = pd.DataFrame(blues_data2)

In [40]:
blues['genre'] = 'blues'

In [41]:
jazz_data2 = process_wav(jazz_dir)
jazz = pd.DataFrame(jazz_data2)

In [42]:
jazz['genre'] = 'jazz'

In [43]:
reggae_data2 = process_wav(reggae_dir)
reggae = pd.DataFrame(reggae_data2)

In [44]:
reggae['genre'] = 'reggae'

In [45]:
rock_data2 = process_wav(rock_dir)
rock = pd.DataFrame(rock_data2)

In [46]:
rock['genre'] = 'rock'

In [47]:
disco_data2 = process_wav(disco_dir)
disco = pd.DataFrame(disco_data2)

In [48]:
disco['genre'] = 'disco'

In [49]:
df2 = pd.concat([pop, classical, country, metal, hiphop, blues,
                jazz, reggae, rock, disco], axis = 0)
df2

Unnamed: 0,file_name,centroid_mean,centroid_var,centroid_min,centroid_max,chroma_mean,chroma_var,chroma_min,chroma_max,tntz_mean,...,tntz_min,tntz_max,melspec_mean,melspec_var,melspec_min,melspec_max,rms_mean,rms_var,tempo,genre
0,pop.00043_segment_2.wav,82.793015,596.820190,467.516693,258.133148,208.543640,87.951103,46.422832,22.718653,14.411210,...,9.441512,7.465686,5.300585,9.471663,3.713373,2.897366,2.201550,2.950417,1.918172,pop
1,pop.00024_segment_1.wav,24.541426,196.091431,223.728470,157.544067,78.254951,87.790657,41.718082,39.314674,28.200871,...,11.000272,6.496752,3.944693,2.040767,1.727742,3.659822,8.542729,10.284432,11.067837,pop
2,pop_84_segment_4.wav,11.052395,226.961273,344.616241,179.121689,80.509331,39.219299,36.326817,60.923229,66.588425,...,70.155067,48.747761,32.478123,24.804539,10.288643,7.018808,13.777174,7.533862,8.185196,pop
3,pop.00099_segment_1.wav,13.169909,120.634506,77.438553,24.580126,39.892361,22.157749,13.241145,31.302496,25.418041,...,13.425123,10.485983,15.299289,12.685281,14.964963,15.344956,13.740293,5.640285,7.799134,pop
4,pop.00030_segment_4.wav,7.483120,153.786819,162.466782,62.280880,37.295795,17.394249,10.789756,31.936459,16.220070,...,26.550482,33.872925,14.482818,9.942794,5.841614,28.704268,10.480478,10.087404,6.537398,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673,disco_26_segment_4.wav,4.315606,49.210949,38.880474,26.366489,13.416600,9.585223,11.263802,12.715696,12.595950,...,32.542149,38.058819,62.938992,9.606253,3.066533,2.048466,6.184352,2.101072,1.404086,disco
674,disco.00096_segment_1.wav,4.475010,28.985567,16.983541,7.337393,11.193970,10.793864,2.614853,1.577539,2.108343,...,6.038455,1.356472,1.469308,1.415725,1.684130,0.776304,3.510303,1.644192,0.263944,disco
675,disco.00071_segment_3.wav,1.572156,33.452465,52.495701,15.322603,26.357399,20.187828,15.768407,5.164927,3.692800,...,1.841224,4.537781,5.687685,3.222504,1.866971,1.413203,0.951695,2.246306,1.791022,disco
676,disco.00082_segment_4.wav,8.121984,130.395889,176.070114,105.075607,75.766113,86.074669,57.349949,25.483873,15.053349,...,7.986121,11.477981,11.873212,5.708128,6.470843,10.696095,4.656980,2.386635,4.035692,disco


In [50]:
music = pd.merge(df,df2, on ='file_name')
music

Unnamed: 0,file_name,mfcc_mean,mfcc_var,mfcc_min,mfcc_max,mfcc1_mean,mfcc1_var,mfcc1_min,mfcc1_max,mfcc2_mean,...,tntz_min,tntz_max,melspec_mean,melspec_var,melspec_min,melspec_max,rms_mean,rms_var,tempo,genre_y
0,blues_69_segment_3.wav,-135.282867,108.550240,13.028976,43.992382,19.349297,20.206875,-1.577484,7.924092,1.519116,...,26.657469,12.485189,9.272328,4.058815,12.436750,7.658901,3.056901,10.527410,7.565155,blues
1,blues_27_segment_4.wav,-91.254097,129.466553,-4.100496,45.220657,7.252443,16.338186,0.856577,0.009967,1.849428,...,14.032310,25.550907,32.544811,9.085206,45.008915,46.512520,332.813812,129.471909,6.220565,blues
2,blues_54_segment_2.wav,-191.496078,139.016464,11.991453,28.216597,-0.720076,7.438097,6.724214,6.649870,-0.442536,...,4.716076,10.569683,13.037652,0.732540,0.283661,0.217625,0.489818,1.817209,13.725842,blues
3,blues_33_segment_1.wav,-121.252815,123.478157,13.837003,28.114388,7.535354,15.141087,-3.699708,6.035746,0.958965,...,13.126806,63.396244,21.241156,20.493299,11.539183,11.035036,10.340260,9.673729,3.113847,blues
4,blues.00009_segment_2.wav,-179.017548,138.225616,-30.912750,49.097755,-24.154978,7.536681,-9.466703,10.459454,-20.845018,...,28.867865,17.512299,12.562430,19.704935,44.741116,11.275842,0.719824,0.769180,3.037273,blues
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6982,rock.00063_segment_2.wav,-329.291077,139.876709,53.949989,17.256727,17.011780,9.052016,8.448787,2.826561,0.794123,...,2.449042,3.578562,1.214767,1.100771,1.418536,1.387431,0.550771,0.254566,0.513779,rock
6983,rock.00004_segment_1.wav,-167.203339,143.874512,12.607652,50.331360,-0.389795,35.844570,-3.877623,16.113270,-9.246787,...,15.285677,18.517099,7.640218,10.939061,13.667733,5.664557,9.104764,4.784025,4.133267,rock
6984,rock_36_segment_5.wav,-269.519012,182.936691,9.128018,7.246886,6.279178,-1.525924,-8.689502,7.118561,-5.731806,...,0.654456,7.530454,1.434070,0.361768,8.512635,1.714976,0.366517,7.676395,1.953892,rock
6985,rock_45_segment_3.wav,39.978085,83.575760,-13.916237,31.080481,9.406852,10.378225,-1.250743,9.424727,-1.540306,...,42.720882,33.466774,29.012289,35.188614,17.988277,31.572403,30.429539,89.772820,35.103870,rock


In [54]:
music.drop(columns = 'genre_x', inplace = True)