In [1]:
import os
import librosa
import numpy as np
import pandas as pd

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

**Extract features FFT**

In [2]:
def _extract_features(file_path, n_fft=2048, hop_length=512):
    y, sr = librosa.load(file_path)
    fft_result = np.abs(np.fft.fft(y))
    magnitude_db = librosa.amplitude_to_db(fft_result)

    df = pd.DataFrame(magnitude_db)
    df_mean = df.mean(axis=0).to_frame()
    df_std = df.std(axis=0).to_frame()
    df_all = pd.concat([df_mean,df_std]).T
    
    return df_all

In [3]:
def _read_wav(path):
    df_merge = pd.DataFrame()
    for filename in os.listdir(path):
        if filename.endswith(".wav"):
            file_path = os.path.join(path, filename)
            feature = _extract_features(file_path)
            df_merge = pd.concat([df_merge, feature], ignore_index=True)
    return df_merge

In [4]:
def shuffleArray(array):
    for i in range(len(array) - 1, 0, -1):
        j = np.random.randint(0, i + 1)
        array[i], array[j] = array[j], array[i]

In [5]:
def train():
    scaler = StandardScaler()

    NoQueen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\train\\NonQueen_train"
    features_NoQueen = _read_wav(NoQueen_path)
    features_NoQueen = scaler.fit_transform(features_NoQueen)

    Queen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\train\\Queen_train"
    features_Queen = _read_wav(Queen_path)
    features_Queen = scaler.fit_transform(features_Queen)

    all_features = np.concatenate((features_NoQueen, features_Queen), axis=0)
    labels_train = np.array([0] * len(features_NoQueen) + [1] * len(features_Queen))

    # Shuffle the data
    all_features = np.column_stack((all_features, labels_train))
    shuffleArray(all_features)

    X_train = all_features[:, :-1]
    y_train = all_features[:, -1]

    return X_train, y_train

In [6]:
def val():
    scaler = StandardScaler()

    test_NoQueen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\val\\NonQueen"
    features_NoQueen = _read_wav(test_NoQueen_path)
    features_NoQueen = scaler.fit_transform(features_NoQueen)

    Test_Queen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\val\\Queen"
    features_Queen = _read_wav(Test_Queen_path)
    features_Queen = scaler.fit_transform(features_Queen)


    # all_features = features_NoQueen + features_Queen
    all_features = np.concatenate((features_NoQueen, features_Queen), axis=0)
    all_features_array = np.array(all_features)
    
    # Append labels 
    labels_test = np.array([0] * len(features_NoQueen) + [1] * len(features_Queen))

    all_features_array = np.column_stack((all_features_array, labels_test))
    shuffleArray(all_features_array)

    X_val = all_features_array[:, :-1]  
    y_val = all_features_array[:, -1]

    return X_val, y_val

In [7]:
def test():
    scaler = StandardScaler()

    test_NoQueen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\test\\NonQueen"
    test_NoQueen = _read_wav(test_NoQueen_path)
    test_NoQueen = scaler.fit_transform(test_NoQueen)

    Test_Queen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\test\\Queen"
    Test_Queen = _read_wav(Test_Queen_path)
    Test_Queen = scaler.fit_transform(Test_Queen)

    # all_features = test_NoQueen + Test_Queen
    all_features = np.concatenate((test_NoQueen, Test_Queen), axis=0)
    all_features_array = np.array(all_features)
    
    # Append labels 
    labels_test = np.array([0] * len(test_NoQueen) + [1] * len(Test_Queen))

    all_features_array = np.column_stack((all_features_array, labels_test))
    shuffleArray(all_features_array)

    X_test = all_features_array[:, :-1]  
    y_test = all_features_array[:, -1]

    return X_test, y_test

In [8]:
X_train, labels_train = train()
X_val, labels_val = val()

In [9]:
X_test, labels_test = test()

In [10]:
X_train = pd.DataFrame(X_train)
X_train

Unnamed: 0,0,1
0,3.153603,-0.147051
1,3.153603,-0.147051
2,3.153603,-0.147051
3,-0.305795,0.407632
4,2.947389,0.301775
...,...,...
13995,-0.213247,-0.608141
13996,-0.306249,0.315206
13997,-0.724923,0.780730
13998,1.754642,-3.493872


In [80]:
model = ExtraTreesClassifier(bootstrap=True, 
                             criterion='entropy', 
                             max_depth=20, 
                             max_features='sqrt',
                             min_samples_leaf=1,
                             min_samples_split=2, 
                             n_estimators=40, random_state=42)

model.fit(X_train, labels_train)

In [81]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
report = classification_report(labels_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.851
Classification Report:
               precision    recall  f1-score   support

         0.0       0.87      0.97      0.92      3433
         1.0       0.43      0.16      0.23       567

    accuracy                           0.85      4000
   macro avg       0.65      0.56      0.57      4000
weighted avg       0.81      0.85      0.82      4000



In [77]:
n_estimators = [10, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50]
max_features = ['sqrt']
max_depth = [2, 5, 10, 15, 16, 17, 18, 19, 20]
min_samples_split = [2]
min_samples_leaf = [1]
criterion = ['gini', 'entropy']
bootstrap = [True]

param_grid = {
    'criterion' : criterion,
    'n_estimators': n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth,
    'min_samples_split' : min_samples_split,
    'min_samples_leaf' : min_samples_leaf,
    'bootstrap' : bootstrap
}

ET_model = ExtraTreesClassifier()

ET_Grid = GridSearchCV(estimator=ET_model, param_grid=param_grid, cv=3, verbose = 3)
ET_Grid.fit(X_train, labels_train)

Fitting 3 folds for each of 306 candidates, totalling 918 fits
[CV 1/3] END bootstrap=True, criterion=gini, max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=10;, score=0.848 total time=   0.0s
[CV 2/3] END bootstrap=True, criterion=gini, max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=10;, score=0.848 total time=   0.0s
[CV 3/3] END bootstrap=True, criterion=gini, max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=10;, score=0.848 total time=   0.0s
[CV 1/3] END bootstrap=True, criterion=gini, max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=20;, score=0.848 total time=   0.0s
[CV 2/3] END bootstrap=True, criterion=gini, max_depth=2, max_features=sqrt, min_samples_leaf=1, min_samples_split=2, n_estimators=20;, score=0.848 total time=   0.0s
[CV 3/3] END bootstrap=True, criterion=gini, max_depth=2, max_features=sqrt, min_samples_leaf=1, min_s

In [78]:
ET_Grid.best_params_

{'bootstrap': True,
 'criterion': 'gini',
 'max_depth': 20,
 'max_features': 'sqrt',
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 20}

In [79]:
print(f'Train Accuracy: {ET_Grid.score(X_train, labels_train)}')
print(f'Test Accuracy: {ET_Grid.score(X_test, labels_test)}')

Train Accuracy: 0.9307142857142857
Test Accuracy: 0.8505
