In [29]:
import os
import sklearn
import librosa
import numpy as np

In [30]:
def list_dir(directory):
    directories = []
    for item in os.listdir(directory):
        if os.path.isdir(os.path.join(directory, item)) and not item.startswith('_'):
            directories.append(item)
    return directories

def list_wav(dir):
    files = os.listdir(dir)
    wav_files = [file for file in files if file.endswith('.wav')]

    return wav_files

### Read Files

In [31]:
rootpath = os.getcwd()+'/genre'
dirs = list_dir(rootpath)

files = {}
for gen in dirs:
    files[gen] = list_wav(rootpath+'/'+gen)

print(files)
print(dirs)

{'blues': ['blues.00000.wav', 'blues.00001.wav', 'blues.00002.wav', 'blues.00003.wav', 'blues.00004.wav', 'blues.00005.wav', 'blues.00006.wav', 'blues.00007.wav', 'blues.00008.wav', 'blues.00009.wav', 'blues.00010.wav', 'blues.00011.wav', 'blues.00012.wav', 'blues.00013.wav', 'blues.00014.wav', 'blues.00015.wav', 'blues.00016.wav', 'blues.00017.wav', 'blues.00018.wav', 'blues.00019.wav', 'blues.00020.wav', 'blues.00021.wav', 'blues.00022.wav', 'blues.00023.wav', 'blues.00024.wav', 'blues.00025.wav', 'blues.00026.wav', 'blues.00027.wav', 'blues.00028.wav', 'blues.00029.wav', 'blues.00030.wav', 'blues.00031.wav', 'blues.00032.wav', 'blues.00033.wav', 'blues.00034.wav', 'blues.00035.wav', 'blues.00036.wav', 'blues.00037.wav', 'blues.00038.wav', 'blues.00039.wav', 'blues.00040.wav', 'blues.00041.wav', 'blues.00042.wav', 'blues.00043.wav', 'blues.00044.wav', 'blues.00045.wav', 'blues.00046.wav', 'blues.00047.wav', 'blues.00048.wav', 'blues.00049.wav'], 'classical': ['classical.00000.wav', '

### Feature Extraction

In [32]:
from scipy.stats import skew
import pandas as pd
genre = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

def genre_encode(type):
    return genre.index(type)

# Feature Extraction
def feature_extraction(filepath,type):
    y, sr = librosa.load(filepath)
    ## Timbral
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroid_skewness = skew(spectral_centroid.ravel())
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    spectral_flux = librosa.onset.onset_strength(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y=y)
    mfccs = librosa.feature.mfcc(y=y, sr=sr)
    energy = np.sum(np.abs(y) ** 2) / len(y)
    ## Rhythmic
    rcf, _ = librosa.beat.beat_track(y=y, sr=sr)
    ## Pitch
    pcf = librosa.feature.chroma_stft(y=y, sr=sr)

    return {
        'sp_ce': np.mean(spectral_centroid),
        'sp_ro': np.mean(spectral_rolloff),
        'sp_fl': np.mean(spectral_flux),
        'zcr': np.max(zcr),
        'mfccs': np.mean(np.mean(mfccs,axis=1)),
        'energy': energy,
        'sp_ce_skew': spectral_centroid_skewness,
        'tempo':rcf[0],
        'chroma':np.max(np.mean(pcf,axis=1)),
        'genre':genre_encode(type)
    }

df = pd.DataFrame(columns=['sp_ce','sp_ro','sp_fl','zcr','mfccs','energy','sp_ce_skew','tempo','chroma','genre'])
for music in dirs:
    for file in files[music]:
        new_audio = feature_extraction(rootpath+'/'+music+'/'+file,music)
        df.loc[len(df)] = new_audio


In [33]:
display(df)

Unnamed: 0,sp_ce,sp_ro,sp_fl,zcr,mfccs,energy,sp_ce_skew,tempo,chroma,genre
0,1784.122641,3805.723030,1.391777,0.242676,1.936639,0.019793,1.539074,123.046875,0.439834,0
1,1530.261767,3550.713616,1.445264,0.323730,-0.057255,0.011582,1.337133,67.999589,0.560355,0
2,1552.832481,3042.410115,1.638602,0.440918,1.032618,0.033572,1.927275,161.499023,0.588508,0
3,1070.153418,2184.879029,1.248168,0.145020,0.068924,0.026254,1.001462,63.024009,0.657075,0
4,1835.128513,3579.957471,1.645293,0.366211,-8.460350,0.010682,1.050924,135.999178,0.555124,0
...,...,...,...,...,...,...,...,...,...,...
495,1561.411107,3243.286756,1.019089,0.173828,-5.361122,0.010225,-0.709059,151.999081,0.714005,9
496,2429.067900,5035.572025,1.268373,0.327637,0.010104,0.015929,0.825959,103.359375,0.588969,9
497,2997.853838,5962.265829,1.190302,0.722656,1.163031,0.021289,1.382721,107.666016,0.593037,9
498,1646.635926,3184.299435,1.197816,0.170898,-1.091420,0.021777,0.067954,129.199219,0.678783,9


In [34]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score,confusion_matrix

df = df.sample(frac=1.0,random_state=118)

X = df.drop(columns=['genre'])
y = df['genre']


kf = KFold(n_splits=5, shuffle=True, random_state=42)
svm_acc = []
knn_acc = []
rf_acc = []

## MODEL-SVM

In [None]:
from sklearn.svm import SVC

svm = SVC(kernel='linear')

for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train SVM on training data
    svm.fit(X_train, y_train)
    
    # Predict labels on test data
    y_pred = svm.predict(X_test)
    
    # Calculate accuracy
    print(f'{i+1}th validation:')
    accuracy = accuracy_score(y_test, y_pred)
    print('accuracy:',accuracy)
    svm_acc.append(accuracy)

    print('confusion matrix:\n',confusion_matrix(y_test,y_pred))

print('overall acc:',np.mean(svm_acc))

1th validation:
accuracy: 0.44
confusion matrix:
 [[4 0 1 0 0 3 0 2 0 1]
 [0 7 0 0 0 1 0 0 0 0]
 [0 1 1 2 0 1 1 3 0 3]
 [0 0 0 2 1 1 1 2 0 2]
 [0 0 1 3 3 1 1 2 1 0]
 [1 3 0 0 0 7 0 0 0 0]
 [0 0 1 2 0 0 3 0 0 1]
 [0 0 0 1 0 0 0 5 1 0]
 [1 1 1 0 0 0 1 1 9 0]
 [0 0 1 0 0 2 1 1 1 3]]
2th validation:
accuracy: 0.53
confusion matrix:
 [[9 0 1 0 0 2 0 0 0 1]
 [0 7 0 0 0 0 0 0 0 1]
 [1 0 1 0 0 1 0 3 0 0]
 [0 0 0 5 1 0 2 0 1 0]
 [1 0 0 1 4 0 1 1 2 0]
 [2 2 3 0 0 2 0 0 0 0]
 [0 0 0 2 0 0 9 1 0 0]
 [0 0 0 5 0 0 0 6 0 0]
 [4 0 0 0 0 0 0 0 8 0]
 [3 0 3 0 0 0 2 0 0 2]]
3th validation:
accuracy: 0.41
confusion matrix:
 [[8 0 1 0 0 1 0 1 0 0]
 [0 7 0 0 0 0 0 0 0 1]
 [3 0 0 1 0 1 0 4 1 2]
 [0 0 0 2 2 0 0 4 1 1]
 [0 0 0 1 4 0 0 3 3 0]
 [3 1 1 1 0 1 0 0 0 0]
 [0 0 2 1 1 0 7 0 0 1]
 [0 0 1 0 0 0 1 5 0 2]
 [2 0 0 0 1 1 0 0 5 0]
 [1 0 1 3 0 0 2 0 2 2]]
4th validation:
accuracy: 0.51
confusion matrix:
 [[7 0 0 0 0 1 0 0 0 0]
 [1 9 0 0 0 3 0 0 0 0]
 [1 0 1 3 1 2 0 0 0 1]
 [0 0 1 5 4 0 2 3 1 0]
 [0 0 0 0 7 0 0

## MODEL-KNN

In [42]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=8)

for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train SVM on training data
    knn.fit(X_train, y_train)
    
    # Predict labels on test data
    y_pred = knn.predict(X_test)
    
    # Calculate accuracy
    print(f'{i+1}th validation:')
    accuracy = accuracy_score(y_test, y_pred)
    print('accuracy:',accuracy)
    knn_acc.append(accuracy)

    print('confusion matrix:\n',confusion_matrix(y_test,y_pred))

print('overall acc:',np.mean(knn_acc))

1th validation:
accuracy: 0.26
confusion matrix:
 [[4 0 2 0 1 2 0 0 2 0]
 [1 3 0 0 0 4 0 0 0 0]
 [0 1 3 2 1 1 0 1 0 3]
 [0 0 2 2 1 0 0 2 0 2]
 [0 0 0 3 4 0 2 0 1 2]
 [4 4 0 0 0 1 0 0 2 0]
 [0 0 1 3 1 0 1 0 1 0]
 [0 0 2 1 1 0 0 2 1 0]
 [2 0 1 1 2 1 0 2 5 0]
 [2 0 2 0 0 3 1 0 0 1]]
2th validation:
accuracy: 0.35
confusion matrix:
 [[5 3 1 0 0 4 0 0 0 0]
 [1 6 0 0 0 1 0 0 0 0]
 [1 0 1 0 0 1 0 2 1 0]
 [0 0 0 5 1 0 2 0 1 0]
 [2 0 2 2 1 0 2 1 0 0]
 [0 2 2 0 0 2 0 0 2 1]
 [1 0 1 1 0 0 8 1 0 0]
 [0 0 0 9 0 0 0 2 0 0]
 [1 0 0 1 2 3 0 0 5 0]
 [1 0 2 2 1 1 1 0 2 0]]
3th validation:
accuracy: 0.38
confusion matrix:
 [[3 1 0 0 2 4 0 0 1 0]
 [0 7 0 0 0 0 0 0 0 1]
 [3 0 2 0 2 0 0 1 1 3]
 [0 0 0 3 0 0 2 3 1 1]
 [0 0 1 1 2 1 1 4 1 0]
 [1 2 0 0 0 3 0 0 0 1]
 [0 0 1 1 1 1 8 0 0 0]
 [0 0 1 1 1 0 0 4 2 0]
 [0 1 0 1 0 1 0 0 5 1]
 [0 0 2 2 3 0 0 1 2 1]]
4th validation:
accuracy: 0.27
confusion matrix:
 [[3 1 0 0 1 1 0 0 2 0]
 [5 5 0 0 0 3 0 0 0 0]
 [0 1 4 1 1 2 0 0 0 0]
 [0 0 1 2 3 0 4 5 0 1]
 [0 0 3 2 0 0 0

## MODEL-Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier 

rf = RandomForestClassifier()

for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train SVM on training data
    rf.fit(X_train, y_train)
    
    # Predict labels on test data
    y_pred = rf.predict(X_test)
    
    # Calculate accuracy
    print(f'{i+1}th validation:')
    accuracy = accuracy_score(y_test, y_pred)
    print('accuracy:',accuracy)
    rf_acc.append(accuracy)

    print('confusion matrix:\n',confusion_matrix(y_test,y_pred))

print('overall acc:',np.mean(rf_acc))

1th validation:
accuracy: 0.57
confusion matrix:
 [[8 0 2 0 0 0 0 0 0 1]
 [0 4 0 0 0 4 0 0 0 0]
 [0 0 4 1 0 2 2 0 1 2]
 [0 0 0 6 1 0 0 0 1 1]
 [0 0 1 2 6 1 1 1 0 0]
 [1 3 0 0 0 7 0 0 0 0]
 [0 0 0 0 0 0 6 0 0 1]
 [0 0 0 0 3 0 1 2 0 1]
 [0 0 0 2 1 1 0 1 9 0]
 [0 0 2 1 0 0 0 0 1 5]]
2th validation:
accuracy: 0.74
confusion matrix:
 [[12  0  0  0  0  1  0  0  0  0]
 [ 0  7  0  0  0  0  0  0  0  1]
 [ 0  0  4  0  0  1  0  1  0  0]
 [ 0  0  0  7  1  0  0  0  1  0]
 [ 0  0  0  1  6  0  0  2  1  0]
 [ 0  2  0  0  0  7  0  0  0  0]
 [ 1  0  1  0  0  0 10  0  0  0]
 [ 0  0  0  2  2  0  0  6  1  0]
 [ 0  0  0  1  0  0  0  0 11  0]
 [ 4  0  1  0  0  0  1  0  0  4]]
3th validation:
accuracy: 0.63
confusion matrix:
 [[9 0 0 0 0 1 0 0 0 1]
 [0 7 0 0 0 0 0 0 0 1]
 [3 0 4 0 0 1 3 0 0 1]
 [0 0 0 6 1 0 1 1 0 1]
 [0 0 0 1 5 0 0 3 2 0]
 [0 0 0 0 0 6 1 0 0 0]
 [0 0 2 0 0 0 9 0 0 1]
 [0 0 2 0 0 0 0 5 0 2]
 [1 0 1 0 1 0 0 0 6 0]
 [0 0 0 1 1 0 2 0 1 6]]
4th validation:
accuracy: 0.66
confusion matrix:
 [[ 5  0

## MODEL - XGBoost

In [15]:
import xgboost as xgb

clf = xgb.XGBClassifier()
clf_acc = []

for i, (train_index, test_index) in enumerate(kf.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train SVM on training data
    clf.fit(X_train, y_train)
    
    # Predict labels on test data
    y_pred = clf.predict(X_test)
    
    # Calculate accuracy
    print(f'{i+1}th validation:')
    accuracy = accuracy_score(y_test, y_pred)
    print('accuracy:',accuracy)
    clf_acc.append(accuracy)

    print('confusion matrix:\n',confusion_matrix(y_test,y_pred))

print('overall acc:',np.mean(clf_acc))

1th validation:
accuracy: 0.59
confusion matrix:
 [[9 0 1 0 0 0 0 0 0 1]
 [0 5 1 0 0 2 0 0 0 0]
 [0 0 7 1 0 1 1 0 1 1]
 [0 0 0 3 2 0 0 1 2 1]
 [0 0 1 1 7 0 0 1 0 2]
 [1 2 0 0 0 8 0 0 0 0]
 [0 0 0 0 0 0 5 0 0 2]
 [0 0 1 0 4 0 0 1 0 1]
 [1 0 0 0 2 0 0 2 9 0]
 [0 0 2 0 0 0 0 1 1 5]]
2th validation:
accuracy: 0.7
confusion matrix:
 [[11  0  1  0  0  0  0  0  1  0]
 [ 0  7  0  0  0  0  0  0  0  1]
 [ 2  0  3  0  0  0  0  1  0  0]
 [ 0  0  0  8  0  0  0  0  1  0]
 [ 0  0  0  2  6  0  0  0  2  0]
 [ 0  2  1  0  0  6  0  0  0  0]
 [ 0  0  1  0  0  0 11  0  0  0]
 [ 0  0  0  2  2  0  0  7  0  0]
 [ 1  0  0  0  0  1  0  0 10  0]
 [ 4  0  2  0  1  0  2  0  0  1]]
3th validation:
accuracy: 0.66
confusion matrix:
 [[9 0 0 0 0 1 0 0 0 1]
 [0 7 0 0 0 0 1 0 0 0]
 [0 0 8 0 0 0 1 0 2 1]
 [0 0 0 6 0 0 1 2 0 1]
 [0 0 0 1 6 0 0 3 1 0]
 [0 0 0 0 0 6 1 0 0 0]
 [0 0 3 0 0 0 9 0 0 0]
 [0 0 2 0 0 0 0 5 0 2]
 [1 0 1 0 1 0 0 0 6 0]
 [0 0 0 2 0 0 3 1 1 4]]
4th validation:
accuracy: 0.63
confusion matrix:
 [[ 4  0 

In [23]:
self_test = os.listdir(rootpath+'/_test')
X = pd.DataFrame(columns=['sp_ce','sp_ro','sp_fl','zcr','mfccs','energy','sp_ce_skew','tempo','chroma','genre'])
for fn in self_test:
    print(fn)
    x = feature_extraction(rootpath+'/_test/'+fn,'pop')
    X.loc[len(X)] = x

X = X.drop(columns=['genre'])
display(X)

svm_pred = svm.predict(X)
print('svm prediction\n',svm_pred)

knn_pred = knn.predict(X)
print('knn prediction\n',knn_pred)

clf_pred = clf.predict(X)
print('xgb prediction\n',clf_pred)

rf_pred = rf.predict(X)
print('rf prediction\n',rf_pred)
    

rt_BOO.mp3
rt_Luv Or Love.mp3
rt_渐暖.mp3
rt_百忧戒v1.mp3
rt_达尔文 王源.mp3


Unnamed: 0,sp_ce,sp_ro,sp_fl,zcr,mfccs,energy,sp_ce_skew,tempo,chroma
0,3061.555504,6902.046327,1.737205,0.39209,10.07703,0.210864,0.335393,143.554688,0.607021
1,3172.473892,6512.861232,1.041924,0.425293,7.533351,0.100505,1.316669,172.265625,0.541862
2,2782.837528,6331.041213,1.235071,0.430176,7.930734,0.152144,1.06195,117.453835,0.60985
3,2202.689692,4687.876199,1.249305,0.379395,7.476163,0.180054,1.292315,135.999178,0.583331
4,1372.345885,2665.227768,1.252997,0.344238,-5.022725,0.030996,2.820789,95.703125,0.62175


svm prediction
 [7 6 7 6 2]
knn prediction
 [3 3 2 0 1]
xgb prediction
 [4 7 2 9 0]
rf prediction
 [4 2 2 9 0]
