In [1]:
import pandas as pd 
import numpy as np 

from tensorflow import keras
import os

import soundfile as sf

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns 

import librosa
import librosa.display

import warnings
warnings.filterwarnings('ignore')

2023-07-23 21:28:49.480227: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### MFCC Features

In [2]:
def mfcc_features(file_path):
    # Loading audio file to librosa
    y, sr = librosa.load(file_path, offset=0, duration=30)
    
    # obtain mfcc features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40, hop_length=512, n_fft=2048)
    mfcc_mean = mfcc.mean(axis=1)
    mfcc_var = mfcc.std(axis=1)
    mfcc_min = mfcc.min(axis=1)
    mfcc_max = mfcc.max(axis=1)
    
    mfcc_1 = librosa.feature.mfcc(y=y, sr=sr, hop_length=512, n_fft=2048, n_mfcc=40)
    mfcc1_mean = mfcc_1.mean(axis=1)
    mfcc1_var = mfcc_1.std(axis=1)
    mfcc1_min = mfcc_1.min(axis=1)
    mfcc1_max = mfcc_1.max(axis=1)
    
    mfcc_2 = librosa.feature.mfcc(y=y, sr=sr, hop_length=256, n_fft=1024, n_mfcc=40)
    mfcc2_mean = mfcc_2.mean(axis=1)
    mfcc2_var = mfcc_2.std(axis=1)
    mfcc2_min = mfcc_2.min(axis=1)
    mfcc2_max = mfcc_2.max(axis=1)
    
    delta_mfcc = librosa.feature.delta(mfcc)
    delta_mean = delta_mfcc.mean(axis=1)
    delta_var = delta_mfcc.std(axis=1)
    
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    delta2_mean = delta2_mfcc.mean(axis=1)
    delta2_var = delta2_mfcc.std(axis=1)
    
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    mfcc_harmonic = librosa.feature.mfcc(y=y_harmonic, sr=sr, n_mfcc=40)
    harmonic_mean = mfcc_harmonic.mean(axis=1)
    harmonic_var = mfcc_harmonic.std(axis=1)
    
    mfcc_percussive = librosa.feature.mfcc(y=y_percussive, sr=sr, n_mfcc=40)
    percussive_mean = mfcc_percussive.mean(axis=1)
    percussive_var = mfcc_percussive.std(axis=1)

    
    mfcc_feature = np.concatenate((mfcc_mean, mfcc_var, mfcc_min, mfcc_max, 
                                  mfcc1_mean, mfcc1_var, mfcc1_min, mfcc1_max,
                                  mfcc2_mean, mfcc2_var, mfcc2_min, mfcc2_max,
                                  delta_mean, delta_var, delta2_mean, delta2_var,
                                  harmonic_mean, harmonic_var, 
                                  percussive_mean, percussive_var))

    return mfcc_feature


In [3]:
def process_wav_files(directory):
    data = {
        'file_name': [],
        'mfcc_mean': [], 'mfcc_var': [], 'mfcc_min': [],'mfcc_max': [],
        'mfcc1_mean': [], 'mfcc1_var': [], 'mfcc1_min': [],'mfcc1_max': [],
        'mfcc2_mean': [], 'mfcc2_var': [], 'mfcc2_min': [],'mfcc2_max': [],
        'delta_mean': [], 'delta_var':[], 'delta2_mean': [], 'delta2_var':[], 
        'harmonic_mean': [], 'harmonic_var':[], 'percussive_mean': [], 'percussive_var':[]
        
    }
    
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            feat = mfcc_features(file_path)
            data['file_name'].append(filename)
            data['mfcc_mean'].append(feat[0])
            data['mfcc_var'].append(feat[1])
            data['mfcc_min'].append(feat[2])
            data['mfcc_max'].append(feat[3])
            data['mfcc1_mean'].append(feat[4])
            data['mfcc1_var'].append(feat[5])
            data['mfcc1_min'].append(feat[6])
            data['mfcc1_max'].append(feat[7])
            data['mfcc2_mean'].append(feat[8])
            data['mfcc2_var'].append(feat[9])
            data['mfcc2_min'].append(feat[10])
            data['mfcc2_max'].append(feat[11])
            data['delta_mean'].append(feat[12])
            data['delta_var'].append(feat[13])
            data['delta2_mean'].append(feat[14])
            data['delta2_var'].append(feat[15])
            data['harmonic_mean'].append(feat[16])
            data['harmonic_var'].append(feat[17])
            data['percussive_mean'].append(feat[18])
            data['percussive_var'].append(feat[19])

    return data

In [4]:
directory = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/pop'
data = process_wav_files(directory)
pop_df = pd.DataFrame(data)

In [7]:
pop_df['genre'] = 'pop'

In [8]:
class_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/classical'
class_data = process_wav_files(class_dir)
classical_df = pd.DataFrame(class_data)

In [9]:
classical_df['genre'] = 'classical'

In [10]:
country_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/country'
country_data = process_wav_files(country_dir)
country_df = pd.DataFrame(country_data)

In [11]:
country_df['genre'] = 'country'

In [12]:
metal_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/metal'
metal_data = process_wav_files(metal_dir)
metal_df = pd.DataFrame(metal_data)

In [13]:
metal_df['genre'] = 'metal'

In [14]:
hiphop_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/hiphop'
hiphop_data = process_wav_files(hiphop_dir)
hiphop_df = pd.DataFrame(hiphop_data)

In [15]:
hiphop_df['genre'] = 'hiphop'

In [16]:
blues_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/blues'
blues_data = process_wav_files(blues_dir)
blues_df = pd.DataFrame(blues_data)

In [17]:
blues_df['genre'] = 'blues'

In [18]:
jazz_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/jazz'
jazz_data = process_wav_files(jazz_dir)
jazz_df = pd.DataFrame(jazz_data)

In [19]:
jazz_df['genre'] = 'jazz'

In [20]:
reggae_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/reggae'
reggae_data = process_wav_files(reggae_dir)
reggae_df = pd.DataFrame(reggae_data)

In [21]:
reggae_df['genre'] = 'reggae'

In [22]:
rock_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/rock'
rock_data = process_wav_files(rock_dir)
rock_df = pd.DataFrame(rock_data)

In [23]:
rock_df['genre'] = 'rock'

In [24]:
disco_dir = '/Users/kellyjara/Desktop/Project 5 (copy)/Data/genres_original/disco'
disco_data = process_wav_files(disco_dir)
disco_df = pd.DataFrame(disco_data)

In [25]:
disco_df['genre'] = 'disco'

In [26]:
df = pd.concat([pop_df, classical_df, country_df, metal_df, hiphop_df, 
                blues_df, jazz_df, reggae_df, rock_df, disco_df], axis = 0)
df

Unnamed: 0,file_name,mfcc_mean,mfcc_var,mfcc_min,mfcc_max,mfcc1_mean,mfcc1_var,mfcc1_min,mfcc1_max,mfcc2_mean,...,mfcc2_max,delta_mean,delta_var,delta2_mean,delta2_var,harmonic_mean,harmonic_var,percussive_mean,percussive_var,genre
0,pop_19.wav,-8.942821,64.183517,-1.467964,23.011240,4.736657,6.307176,8.763370,8.697861,0.020061,...,5.837245,2.375216,6.871263,2.162678,3.867641,-0.503229,6.113183,1.010961,4.799995,pop
1,pop_25.wav,-45.097984,88.633492,8.985597,31.276129,6.828823,15.752106,0.584817,10.050541,0.788925,...,4.653553,5.386140,4.727638,3.375497,2.840486,-8.272810,3.533610,-3.623963,1.001146,pop
2,pop_31.wav,-106.645210,65.831772,12.572899,30.452551,16.271381,3.800628,-1.045976,4.102991,-2.589358,...,-0.259178,-3.413261,-0.045900,-5.633716,-1.811600,-6.078490,1.124250,-6.002543,-4.504916,pop
3,pop.00027.wav,-75.366127,83.334877,10.703896,14.455102,14.244521,7.410257,5.279713,-0.400126,5.717757,...,-4.227081,-1.845522,-5.907954,-1.272481,-3.783554,-2.864618,-0.935335,-1.111585,-2.721470,pop
4,pop.00033.wav,-69.001930,73.769997,-12.468982,9.536624,-0.045610,12.718271,3.165059,-0.084658,2.522358,...,-3.025172,1.614928,1.089264,0.934386,-0.416273,-1.238210,-0.757658,-1.704667,-1.043756,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,disco.00062.wav,-107.867599,81.936584,-30.634069,41.331936,-8.509501,31.333731,-8.869069,26.030899,-8.280142,...,7.425512,-4.352716,4.491106,-4.098702,3.072220,-0.286423,4.993085,0.406348,3.839272,disco
96,disco.00089.wav,-114.932045,113.463867,-29.804312,42.612103,-10.326010,26.973412,-14.493735,21.240532,-17.650539,...,8.591482,-11.427423,6.205875,-10.899924,3.658359,-10.317460,0.562174,-4.165685,3.474561,disco
97,disco.00088.wav,-97.885620,101.003601,-40.547611,51.050190,-11.709620,25.064747,-14.048207,17.197395,-9.623669,...,11.723433,-5.716229,6.249413,-6.060791,4.687675,-6.252405,-0.984315,-2.744111,0.627368,disco
98,disco.00063.wav,-51.776085,70.331352,-3.911701,49.742279,1.587971,35.703487,-4.088695,24.787909,-9.707024,...,17.181259,-2.766424,11.962386,-1.631016,6.924509,-2.909797,4.914970,-0.487813,6.734445,disco


### Adding More Features

In [30]:
def extract_features(file_path):
    # Loading audio file to librosa
    y, sr = librosa.load(file_path, offset=0, duration=30) 
    
    #obtain melspectogram features
    melspec = librosa.feature.melspectrogram(y=y, sr=sr)
    melspec_mean = melspec.mean(axis=1)
    melspec_var = melspec.std(axis=1)
    melspec_min = melspec.min(axis=1)
    melspec_max = melspec.max(axis=1)

    # obtain spectral centroid features
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    centroid_mean = centroid.mean(axis=1)
    centroid_var = centroid.std(axis=1)
    centroid_min = centroid.min(axis=1)
    centroid_max = centroid.max(axis=1)

    # obtain chroma vector features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    chroma_var = chroma.std(axis =1)
    chroma_min = chroma.min(axis=1)
    chroma_max = chroma.max(axis=1)

    #getting tonnetz
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    tntz_mean = tonnetz.mean(axis=1)
    tntz_var = tonnetz.std(axis=1)
    tntz_min = tonnetz.min(axis=1)
    tntz_max = tonnetz.max(axis=1)

    #root-mean-squared
    rms = librosa.feature.rms(y=y)
    rms_mean = rms.mean(axis = 1)
    rms_var = rms.std(axis = 1)

    #getting tempo
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = np.array(librosa.feature.tempo(onset_envelope=onset_env, sr=sr))


    features = np.concatenate((melspec_mean, melspec_var, melspec_min, melspec_max,
                               centroid_mean, centroid_var, centroid_min, centroid_max,
                               chroma_mean, chroma_var, chroma_min, chroma_max,
                               tntz_mean, tntz_var, tntz_min, tntz_max,
                               rms_mean, rms_var, tempo))
    return features


In [31]:
def process_wav(directory):
    data2 = {
        'file_name': [],
        'centroid_mean': [], 'centroid_var': [], 'centroid_min': [], 'centroid_max': [], 
        'chroma_mean': [], 'chroma_var': [], 'chroma_min': [], 'chroma_max': [],
        'tntz_mean': [], 'tntz_var': [], 'tntz_min': [], 'tntz_max': [],
        'melspec_mean': [], 'melspec_var': [], 'melspec_min': [], 'melspec_max': [],
        'rms_mean':[], 'rms_var':[],
        'tempo':[]
        
    }
    
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            feat = extract_features(file_path)
            data2['file_name'].append(filename)
            data2['centroid_mean'].append(feat[0])
            data2['centroid_var'].append(feat[1])
            data2['centroid_min'].append(feat[2])
            data2['centroid_max'].append(feat[3])
            data2['chroma_mean'].append(feat[4])
            data2['chroma_var'].append(feat[5])
            data2['chroma_min'].append(feat[6])
            data2['chroma_max'].append(feat[7])
            data2['tntz_mean'].append(feat[8])
            data2['tntz_var'].append(feat[9])
            data2['tntz_min'].append(feat[10])
            data2['tntz_max'].append(feat[11])
            data2['melspec_mean'].append(feat[12])
            data2['melspec_var'].append(feat[13])
            data2['melspec_min'].append(feat[14])
            data2['melspec_max'].append(feat[15])
            data2['rms_mean'].append(feat[16])
            data2['rms_var'].append(feat[17])
            data2['tempo'].append(feat[18])
    return data2

In [32]:
data2 = process_wav(directory)
pop = pd.DataFrame(data2)

In [34]:
pop['genre'] = 'pop'

In [35]:
class_data2 = process_wav(class_dir)
classical = pd.DataFrame(class_data2)

In [36]:
classical['genre'] = 'classical'

In [37]:
country_data2 = process_wav(country_dir)
country = pd.DataFrame(country_data2)

In [38]:
country['genre'] = 'country'

In [39]:
metal_data2 = process_wav(metal_dir)
metal = pd.DataFrame(metal_data2)

In [40]:
metal['genre'] = 'metal'

In [41]:
hiphop_data2 = process_wav(hiphop_dir)
hiphop = pd.DataFrame(hiphop_data2)

In [42]:
hiphop['genre'] = 'hiphop'

In [43]:
blues_data2 = process_wav(blues_dir)
blues = pd.DataFrame(blues_data2)

In [44]:
blues['genre'] = 'blues'

In [45]:
jazz_data2 = process_wav(jazz_dir)
jazz = pd.DataFrame(jazz_data2)

In [46]:
jazz['genre'] = 'jazz'

In [48]:
reggae_data2 = process_wav(reggae_dir)
reggae = pd.DataFrame(reggae_data2)

In [49]:
reggae['genre'] = 'reggae'

In [50]:
rock_data2 = process_wav(rock_dir)
rock = pd.DataFrame(rock_data2)

In [51]:
rock['genre'] = 'rock'

In [52]:
disco_data2 = process_wav(disco_dir)
disco = pd.DataFrame(disco_data2)

In [55]:
disco['genre'] = 'disco'

In [56]:
df2 = pd.concat([pop, classical, country, metal, hiphop, blues,
                jazz, reggae, rock, disco], axis = 0)
df2

Unnamed: 0,file_name,centroid_mean,centroid_var,centroid_min,centroid_max,chroma_mean,chroma_var,chroma_min,chroma_max,tntz_mean,...,tntz_min,tntz_max,melspec_mean,melspec_var,melspec_min,melspec_max,rms_mean,rms_var,tempo,genre
0,pop_19.wav,82.084167,603.231445,505.518097,157.195404,91.630730,33.797661,26.031683,16.777105,18.285719,...,23.226763,25.148724,22.026917,16.932308,12.370562,8.246605,19.116907,11.814075,30.205139,pop
1,pop_25.wav,18.040155,253.624084,876.471191,352.951660,111.919807,128.307251,57.621445,46.264248,46.435326,...,12.809576,15.228867,17.706797,21.371332,16.580214,12.868278,15.292180,53.268745,16.516775,pop
2,pop_31.wav,12.653563,79.815910,86.097214,72.415031,40.156002,22.366922,41.162632,54.717033,61.719608,...,57.029930,33.422485,22.549700,19.325535,4.672528,16.462545,8.511788,15.057601,5.709808,pop
3,pop.00027.wav,14.005191,90.825630,86.733261,63.581123,38.495579,23.554655,29.044807,24.611513,49.661633,...,36.525730,20.675253,10.802331,14.559845,15.033474,5.253385,9.168291,4.383974,5.567156,pop
4,pop.00033.wav,9.993971,53.506088,63.174603,53.627644,41.017807,23.307215,19.838682,13.002764,9.881503,...,5.814939,4.739766,8.573198,7.718379,8.373836,5.137630,4.571923,2.318799,5.877789,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,disco.00062.wav,12.945897,86.035675,164.240356,128.113083,126.318604,47.431427,27.740719,28.578512,17.882576,...,1.811980,4.863695,3.857496,4.530632,5.103957,2.381736,1.127911,1.859340,0.916801,disco
96,disco.00089.wav,0.608747,15.175939,27.045454,18.919024,20.652573,16.974533,19.426746,16.754148,10.588133,...,4.009619,4.307391,7.870385,8.390940,6.161437,2.004640,2.895298,3.721590,1.561397,disco
97,disco.00088.wav,1.931880,24.287342,68.514572,82.372833,65.294823,38.784081,18.444923,10.076565,4.725421,...,2.948735,2.458494,2.252382,2.389016,3.919963,2.275582,2.969935,1.674802,1.885246,disco
98,disco.00063.wav,105.429420,896.089417,1011.060852,385.247620,152.284042,83.957321,49.250954,19.111797,10.864658,...,5.689360,7.742268,8.657559,7.667148,4.423709,4.082315,3.176005,2.357672,1.589757,disco


In [58]:
music = pd.merge(df,df2, on ='file_name')
music

Unnamed: 0,file_name,mfcc_mean,mfcc_var,mfcc_min,mfcc_max,mfcc1_mean,mfcc1_var,mfcc1_min,mfcc1_max,mfcc2_mean,...,tntz_min,tntz_max,melspec_mean,melspec_var,melspec_min,melspec_max,rms_mean,rms_var,tempo,genre_y
0,pop_19.wav,-8.942821,64.183517,-1.467964,23.011240,4.736657,6.307176,8.763370,8.697861,0.020061,...,23.226763,25.148724,22.026917,16.932308,12.370562,8.246605,19.116907,11.814075,30.205139,pop
1,pop_25.wav,-45.097984,88.633492,8.985597,31.276129,6.828823,15.752106,0.584817,10.050541,0.788925,...,12.809576,15.228867,17.706797,21.371332,16.580214,12.868278,15.292180,53.268745,16.516775,pop
2,pop_31.wav,-106.645210,65.831772,12.572899,30.452551,16.271381,3.800628,-1.045976,4.102991,-2.589358,...,57.029930,33.422485,22.549700,19.325535,4.672528,16.462545,8.511788,15.057601,5.709808,pop
3,pop.00027.wav,-75.366127,83.334877,10.703896,14.455102,14.244521,7.410257,5.279713,-0.400126,5.717757,...,36.525730,20.675253,10.802331,14.559845,15.033474,5.253385,9.168291,4.383974,5.567156,pop
4,pop.00033.wav,-69.001930,73.769997,-12.468982,9.536624,-0.045610,12.718271,3.165059,-0.084658,2.522358,...,5.814939,4.739766,8.573198,7.718379,8.373836,5.137630,4.571923,2.318799,5.877789,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1512,disco.00062.wav,-107.867599,81.936584,-30.634069,41.331936,-8.509501,31.333731,-8.869069,26.030899,-8.280142,...,1.811980,4.863695,3.857496,4.530632,5.103957,2.381736,1.127911,1.859340,0.916801,disco
1513,disco.00089.wav,-114.932045,113.463867,-29.804312,42.612103,-10.326010,26.973412,-14.493735,21.240532,-17.650539,...,4.009619,4.307391,7.870385,8.390940,6.161437,2.004640,2.895298,3.721590,1.561397,disco
1514,disco.00088.wav,-97.885620,101.003601,-40.547611,51.050190,-11.709620,25.064747,-14.048207,17.197395,-9.623669,...,2.948735,2.458494,2.252382,2.389016,3.919963,2.275582,2.969935,1.674802,1.885246,disco
1515,disco.00063.wav,-51.776085,70.331352,-3.911701,49.742279,1.587971,35.703487,-4.088695,24.787909,-9.707024,...,5.689360,7.742268,8.657559,7.667148,4.423709,4.082315,3.176005,2.357672,1.589757,disco
