In [1]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
def load_pickle_data(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

In [3]:
train_data = load_pickle_data("Train_stft_features.pkl")
val_data = load_pickle_data("Val_stft_features.pkl")
test_data = load_pickle_data("Test_stft_features.pkl")

# SVM (Support vector machine)

In [4]:
X_train = []
y_train = []

for item in train_data:
    stft = np.mean(item['stft'], axis=1) 
    label = item['category']

    X_train.append(stft)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

X_val = []
y_val = []

for item in val_data:
    stft = np.mean(item['stft'], axis=1) 
    label = item['category']

    X_val.append(stft)
    y_val.append(label)

X_val = np.array(X_val)
y_val = np.array(y_val)

svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

X_test = []
y_test = []

for item in test_data:
    stft = np.mean(item['stft'], axis=1) 
    label = item['category']

    X_test.append(stft)
    y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)

y_test_pred = svm_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.50      0.67      1800
     Swarming       0.67      1.00      0.80      1800

     accuracy                           0.75      3600
    macro avg       0.83      0.75      0.73      3600
 weighted avg       0.83      0.75      0.73      3600

Accuracy: 0.75
F1-Score: 0.7333333333333333

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.33      0.50      2400
     Swarming       0.60      1.00      0.75      2441

     accuracy                           0.67      4841
    macro avg       0.80      0.67      0.63      4841
 weighted avg       0.80      0.67      0.63      4841

Accuracy (Test): 0.6701094815120843
F1-Score (Test): 0.6285257241154586


# KNN (K-Nearest Neighbors)

In [5]:
knn_model = KNeighborsClassifier(n_neighbors=11)
knn_model.fit(X_train, y_train)

y_val_pred_knn = knn_model.predict(X_val)

print("Classification report for Validation Set:")
print(classification_report(y_val, y_val_pred_knn))

accuracy_val_knn = accuracy_score(y_val, y_val_pred_knn)
f1_val_knn = f1_score(y_val, y_val_pred_knn, average='weighted')

print(f"Accuracy on Validation Set (KNN): {accuracy_val_knn}")
print(f"F1-Score on Validation Set (KNN): {f1_val_knn}")

y_test_pred_knn = knn_model.predict(X_test)

print("Classification report for Test Set:")
print(classification_report(y_test, y_test_pred_knn))

accuracy_test_knn = accuracy_score(y_test, y_test_pred_knn)
f1_test_knn = f1_score(y_test, y_test_pred_knn, average='weighted')

print(f"Accuracy on Test Set (KNN): {accuracy_test_knn}")
print(f"F1-Score on Test Set (KNN): {f1_test_knn}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.83      0.51      0.63      1800
     Swarming       0.65      0.90      0.75      1800

     accuracy                           0.70      3600
    macro avg       0.74      0.70      0.69      3600
 weighted avg       0.74      0.70      0.69      3600

Accuracy on Validation Set (KNN): 0.7036111111111111
F1-Score on Validation Set (KNN): 0.6917910851943457
Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.33      0.50      2400
     Swarming       0.60      1.00      0.75      2441

     accuracy                           0.67      4841
    macro avg       0.80      0.67      0.63      4841
 weighted avg       0.80      0.67      0.63      4841

Accuracy on Test Set (KNN): 0.6701094815120843
F1-Score on Test Set (KNN): 0.6285257241154586


# NB (Naive Bayes)

In [6]:
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

y_pred = nb_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.24      0.39      1800
     Swarming       0.57      1.00      0.73      1800

     accuracy                           0.62      3600
    macro avg       0.78      0.62      0.56      3600
 weighted avg       0.78      0.62      0.56      3600

Accuracy: 0.6219444444444444
F1-Score: 0.5588997485433276

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.33      0.50      2400
     Swarming       0.60      1.00      0.75      2441

     accuracy                           0.67      4841
    macro avg       0.80      0.67      0.63      4841
 weighted avg       0.80      0.67      0.63      4841

Accuracy (Test): 0.6701094815120843
F1-Score (Test): 0.6285257241154586


# Random Forest(RF)

In [7]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=2000, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = rf_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.25      0.40      1800
     Swarming       0.57      1.00      0.73      1800

     accuracy                           0.62      3600
    macro avg       0.79      0.62      0.56      3600
 weighted avg       0.79      0.62      0.56      3600

Accuracy: 0.6238888888888889
F1-Score: 0.5619179982104283

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.33      0.50      2400
     Swarming       0.60      1.00      0.75      2441

     accuracy                           0.67      4841
    macro avg       0.80      0.67      0.63      4841
 weighted avg       0.80      0.67      0.63      4841

Accuracy (Test): 0.6701094815120843
F1-Score (Test): 0.6285257241154586


# Gradient Boosting, GB

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier


gb_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)

y_pred = gb_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = gb_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.25      0.40      1800
     Swarming       0.57      1.00      0.73      1800

     accuracy                           0.63      3600
    macro avg       0.79      0.63      0.56      3600
 weighted avg       0.79      0.63      0.56      3600

Accuracy: 0.6258333333333334
F1-Score: 0.564922129297585

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.33      0.50      2400
     Swarming       0.60      1.00      0.75      2441

     accuracy                           0.67      4841
    macro avg       0.80      0.67      0.63      4841
 weighted avg       0.80      0.67      0.63      4841

Accuracy (Test): 0.6701094815120843
F1-Score (Test): 0.6285257241154586


In [9]:
import optuna
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.2)
    max_depth = trial.suggest_int('max_depth', 2, 10)

    model = GradientBoostingClassifier(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        random_state=42
    )

    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    return accuracy_score(y_val, y_pred)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=12)

print("Best parameters:", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-02-11 11:45:19,177] A new study created in memory with name: no-name-24761524-6cdd-4bfe-afb3-2cbf59a75390
[I 2025-02-11 11:46:42,463] Trial 0 finished with value: 0.6258333333333334 and parameters: {'n_estimators': 131, 'learning_rate': 0.062190145613357166, 'max_depth': 8}. Best is trial 0 with value: 0.6258333333333334.
[I 2025-02-11 11:47:39,766] Trial 1 finished with value: 0.6258333333333334 and parameters: {'n_estimators': 300, 'learning_rate': 0.15203902564169902, 'max_depth': 10}. Best is trial 0 with value: 0.6258333333333334.
[I 2025-02-11 11:48:40,747] Trial 2 finished with value: 0.6258333333333334 and parameters: {'n_estimators': 98, 'learning_rate': 0.05793735185309525, 'max_depth': 6}. Best is trial 0 with value: 0.6258333333333334.
[I 2025-02-11 11:49:24,699] Trial 3 finished with value: 0.6258333333333334 and parameters: {'n_estimators': 113, 'learning_rate': 0.1918027339322028, 'max_depth': 10}. Best is trial 0

Best parameters: {'n_estimators': 131, 'learning_rate': 0.062190145613357166, 'max_depth': 8}


In [10]:
best_params = study.best_params

best_model = GradientBoostingClassifier(
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    random_state=42
)

best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))
print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = best_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))
print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.25      0.40      1800
     Swarming       0.57      1.00      0.73      1800

     accuracy                           0.63      3600
    macro avg       0.79      0.63      0.56      3600
 weighted avg       0.79      0.63      0.56      3600

Accuracy: 0.6258333333333334
F1-Score: 0.564922129297585

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.33      0.50      2400
     Swarming       0.60      1.00      0.75      2441

     accuracy                           0.67      4841
    macro avg       0.80      0.67      0.63      4841
 weighted avg       0.80      0.67      0.63      4841

Accuracy (Test): 0.6701094815120843
F1-Score (Test): 0.6285257241154586
