In [1]:
import os
import librosa
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

**Extract features FFT**

In [2]:
def _extract_features(file_path):
    df_all = pd.DataFrame()

    y, sr = librosa.load(file_path)
    fft_result = np.abs(np.fft.fft(y))

    df = pd.DataFrame(fft_result)
    
    df_mean = df.mean(axis=0).to_frame()
    df_std = df.std(axis=0).to_frame()
    df_median = df.median(axis=0).to_frame()
    q1 = df.quantile(0.25).to_frame()
    q3 = df.quantile(0.75).to_frame()

    df_all['mean'] = df_mean
    df_all['std'] = df_std
    df_all['median'] = df_median
    df_all['Q1'] = q1
    df_all['Q3'] = q3
    
    return df_all

In [3]:
def _read_wav(path):
    df_merge = pd.DataFrame()
    for filename in os.listdir(path):
        if filename.endswith(".wav"):
            file_path = os.path.join(path, filename)
            feature = _extract_features(file_path)
            df_merge = pd.concat([df_merge, feature], ignore_index=True)
    return df_merge

In [4]:
def extract_data(path_noqueen, path_queen):
    scaler = StandardScaler()

    features_NoQueen = _read_wav(path_noqueen)
    features_NoQueen_scaled = scaler.fit_transform(features_NoQueen)
    features_NoQueen = pd.DataFrame(features_NoQueen_scaled, columns=features_NoQueen.columns)
    features_NoQueen['Label'] = 0

    features_Queen = _read_wav(path_queen)
    features_Queen_scaled = scaler.fit_transform(features_Queen)
    features_Queen = pd.DataFrame(features_Queen_scaled, columns=features_Queen.columns) 
    features_Queen['Label'] = 1

    all_features = pd.concat([features_NoQueen, features_Queen], ignore_index=True)

    # Shuffle the data
    all_features = all_features.sample(frac=1, random_state=42).reset_index(drop=True)

    values = all_features.iloc[:, :-1]
    labels = all_features.iloc[:, -1]

    return values, labels

In [5]:
def load_data(path_data):
    data = np.load(path_data)
    df = pd.DataFrame(data)
    features = df.iloc[:, : -1]
    labels = df.iloc[:, -1]
    return features, labels

In [6]:
Train_NoQueen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\train\\NonQueen_train"
Train_Queen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\train\\Queen_train"

Val_NoQueen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\val\\NonQueen"
Val_Queen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\val\\Queen"

Test_NoQueen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\test\\NonQueen"
Test_Queen_path = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\test\\Queen"

In [7]:
# X_train, labels_train = extract_data(Train_NoQueen_path, Train_Queen_path)
# X_val, labels_val = extract_data(Val_NoQueen_path, Val_Queen_path)
# X_test, labels_test = extract_data(Test_NoQueen_path, Test_Queen_path)

In [9]:
# X_train = pd.concat([X_train, X_val], ignore_index=True)
# labels_train = pd.concat([labels_train, labels_val], ignore_index = True)
# X_train

In [10]:
# Train = pd.concat([X_train, labels_train], axis = 1)
# Test = pd.concat([X_test, labels_test],axis = 1)
# np.save("D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\train.npy", Train)
# np.save("D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\test.npy", Test)

In [11]:
path_train = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\train.npy"
path_test = "D:\\1 Ép bê tê\\4 Kỳ 4 DE\\AIL303m\\Data\\Audio\\20k_audio_splitted_dataset\\test.npy"

In [12]:
X_train, labels_train = load_data(path_train)
X_test, labels_test = load_data(path_test)
X_train

Unnamed: 0,0,1,2,3,4
0,2.332162,2.103162,1.854490,-0.039158,2.397783
1,-0.344594,-0.370745,-0.382962,-0.125056,-0.337867
2,-0.395530,-0.413199,-0.349714,-0.069387,-0.416973
3,-0.291352,-0.289454,-0.359136,-0.380032,-0.317604
4,-0.469207,-0.446744,-0.500538,-0.410412,-0.575200
...,...,...,...,...,...
15995,0.044950,-0.061593,0.328206,-0.346857,0.185315
15996,-0.410598,-0.417933,-0.370051,-0.252268,-0.452390
15997,-0.362981,-0.327498,-0.402777,-0.104918,-0.366331
15998,-0.143070,0.156697,-0.363485,-0.480790,-0.403007


In [13]:
from sklearn.ensemble import ExtraTreesClassifier

In [14]:
Extra_tree = ExtraTreesClassifier(bootstrap=True, 
                             criterion='entropy', 
                             max_depth= 40, 
                             max_features='sqrt',
                             min_samples_leaf=1,
                             min_samples_split=2, 
                             n_estimators= 400,
                             random_state=0)

Extra_tree.fit(X_train, labels_train)

In [15]:
y_pred = Extra_tree.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
report = classification_report(labels_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.92075
Classification Report:
               precision    recall  f1-score   support

         0.0       0.90      0.95      0.92      2000
         1.0       0.95      0.89      0.92      2000

    accuracy                           0.92      4000
   macro avg       0.92      0.92      0.92      4000
weighted avg       0.92      0.92      0.92      4000



In [16]:
from sklearn.ensemble import RandomForestClassifier

In [17]:
Random_forest = RandomForestClassifier(bootstrap=True, 
                                       criterion='entropy', 
                                       max_depth= 32,
                                       max_features='sqrt',
                                       min_samples_leaf=1,
                                       min_samples_split=2,
                                       n_estimators= 300,
                                       random_state=0)

Random_forest.fit(X_train, labels_train)

In [18]:
y_pred = Random_forest.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
report = classification_report(labels_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.9105
Classification Report:
               precision    recall  f1-score   support

         0.0       0.90      0.92      0.91      2000
         1.0       0.92      0.90      0.91      2000

    accuracy                           0.91      4000
   macro avg       0.91      0.91      0.91      4000
weighted avg       0.91      0.91      0.91      4000



In [19]:
from sklearn.linear_model import LogisticRegression

In [20]:
Logistic_regression = LogisticRegression(
    C = 0.1,
    penalty='l2',
    random_state=0
)

Logistic_regression.fit(X_train, labels_train)


In [21]:
y_pred = Logistic_regression.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
report = classification_report(labels_test, y_pred, zero_division=1)


print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.5
Classification Report:
               precision    recall  f1-score   support

         0.0       0.50      1.00      0.67      2000
         1.0       1.00      0.00      0.00      2000

    accuracy                           0.50      4000
   macro avg       0.75      0.50      0.33      4000
weighted avg       0.75      0.50      0.33      4000



In [22]:
from sklearn.svm import SVC

In [23]:
SV_classification = SVC(
    C = 7,
    kernel = 'rbf', # rbf = Radial basis function
    gamma = 5,
    random_state=0
)

SV_classification.fit(X_train, labels_train)

In [24]:
y_pred = SV_classification.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
report = classification_report(labels_test, y_pred, zero_division=1)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.90975
Classification Report:
               precision    recall  f1-score   support

         0.0       0.89      0.94      0.91      2000
         1.0       0.93      0.88      0.91      2000

    accuracy                           0.91      4000
   macro avg       0.91      0.91      0.91      4000
weighted avg       0.91      0.91      0.91      4000



In [25]:
from sklearn.neighbors import KNeighborsClassifier

In [26]:
KNN_classification = KNeighborsClassifier(
    n_neighbors = 15
)

KNN_classification.fit(X_train, labels_train)

In [27]:
y_pred = KNN_classification.predict(X_test)
accuracy = accuracy_score(labels_test, y_pred)
report = classification_report(labels_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.8745
Classification Report:
               precision    recall  f1-score   support

         0.0       0.82      0.96      0.88      2000
         1.0       0.95      0.79      0.86      2000

    accuracy                           0.87      4000
   macro avg       0.88      0.87      0.87      4000
weighted avg       0.88      0.87      0.87      4000

