In [1]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
def load_pickle_data(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

In [3]:
train_data = load_pickle_data("Train_stft_features.pkl")
val_data = load_pickle_data("Val_stft_features.pkl")
test_data = load_pickle_data("Test_stft_features.pkl")

# SVM (Support vector machine)

In [4]:
X_train = []
y_train = []

for item in train_data:
    stft = np.mean(item['stft'], axis=1) 
    label = item['category']

    X_train.append(stft)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

X_val = []
y_val = []

for item in val_data:
    stft = np.mean(item['stft'], axis=1) 
    label = item['category']

    X_val.append(stft)
    y_val.append(label)

X_val = np.array(X_val)
y_val = np.array(y_val)

svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

X_test = []
y_test = []

for item in test_data:
    stft = np.mean(item['stft'], axis=1) 
    label = item['category']

    X_test.append(stft)
    y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)

y_test_pred = svm_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.72      0.84      2200
     Swarming       0.79      1.00      0.88      2249

     accuracy                           0.86      4449
    macro avg       0.89      0.86      0.86      4449
 weighted avg       0.89      0.86      0.86      4449

Accuracy: 0.8626657675882221
F1-Score: 0.8598048625514688

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.56      0.72      5600
     Swarming       0.69      1.00      0.82      5600

     accuracy                           0.78     11200
    macro avg       0.85      0.78      0.77     11200
 weighted avg       0.85      0.78      0.77     11200

Accuracy (Test): 0.7784821428571429
F1-Score (Test): 0.7670513132877068


# KNN (K-Nearest Neighbors)

In [5]:
knn_model = KNeighborsClassifier(n_neighbors=11)
knn_model.fit(X_train, y_train)

y_val_pred_knn = knn_model.predict(X_val)

print("Classification report for Validation Set:")
print(classification_report(y_val, y_val_pred_knn))

accuracy_val_knn = accuracy_score(y_val, y_val_pred_knn)
f1_val_knn = f1_score(y_val, y_val_pred_knn, average='weighted')

print(f"Accuracy on Validation Set (KNN): {accuracy_val_knn}")
print(f"F1-Score on Validation Set (KNN): {f1_val_knn}")

y_test_pred_knn = knn_model.predict(X_test)

print("Classification report for Test Set:")
print(classification_report(y_test, y_test_pred_knn))

accuracy_test_knn = accuracy_score(y_test, y_test_pred_knn)
f1_test_knn = f1_score(y_test, y_test_pred_knn, average='weighted')

print(f"Accuracy on Test Set (KNN): {accuracy_test_knn}")
print(f"F1-Score on Test Set (KNN): {f1_test_knn}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.43      0.60      2200
     Swarming       0.64      1.00      0.78      2249

     accuracy                           0.72      4449
    macro avg       0.82      0.71      0.69      4449
 weighted avg       0.82      0.72      0.69      4449

Accuracy on Validation Set (KNN): 0.7158912115082041
F1-Score on Validation Set (KNN): 0.6898853472036287
Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.40      0.57      5600
     Swarming       0.62      1.00      0.77      5600

     accuracy                           0.70     11200
    macro avg       0.81      0.70      0.67     11200
 weighted avg       0.81      0.70      0.67     11200

Accuracy on Test Set (KNN): 0.6989285714285715
F1-Score on Test Set (KNN): 0.6689179702331951


# NB (Naive Bayes)

In [6]:
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

y_pred = nb_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.41      0.58      2200
     Swarming       0.63      1.00      0.78      2249

     accuracy                           0.71      4449
    macro avg       0.82      0.70      0.68      4449
 weighted avg       0.81      0.71      0.68      4449

Accuracy: 0.7077995055068554
F1-Score: 0.6792895628574087

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.56      0.72      5600
     Swarming       0.69      1.00      0.82      5600

     accuracy                           0.78     11200
    macro avg       0.85      0.78      0.77     11200
 weighted avg       0.85      0.78      0.77     11200

Accuracy (Test): 0.7784821428571429
F1-Score (Test): 0.7670513132877068


# Random Forest(RF)

In [7]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=2000, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = rf_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.41      0.58      2200
     Swarming       0.63      1.00      0.78      2249

     accuracy                           0.71      4449
    macro avg       0.82      0.71      0.68      4449
 weighted avg       0.81      0.71      0.68      4449

Accuracy: 0.7091481231737469
F1-Score: 0.6812385466821502

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.00      0.01      5600
     Swarming       0.50      1.00      0.67      5600

     accuracy                           0.50     11200
    macro avg       0.75      0.50      0.34     11200
 weighted avg       0.75      0.50      0.34     11200

Accuracy (Test): 0.5019642857142858
F1-Score (Test): 0.33768361178703


# Gradient Boosting, GB

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier


gb_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)

y_pred = gb_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = gb_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.83      0.42      0.56      2200
     Swarming       0.62      0.91      0.74      2249

     accuracy                           0.67      4449
    macro avg       0.72      0.67      0.65      4449
 weighted avg       0.72      0.67      0.65      4449

Accuracy: 0.6684648235558552
F1-Score: 0.6464243334910679

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.47      0.64      5600
     Swarming       0.66      1.00      0.79      5600

     accuracy                           0.74     11200
    macro avg       0.83      0.74      0.72     11200
 weighted avg       0.83      0.74      0.72     11200

Accuracy (Test): 0.7372321428571429
F1-Score (Test): 0.7178000268299559


In [9]:
import optuna
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.2)
    max_depth = trial.suggest_int('max_depth', 2, 10)

    model = GradientBoostingClassifier(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        random_state=42
    )

    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    return accuracy_score(y_val, y_pred)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15)

print("Best parameters:", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-02-11 12:06:19,697] A new study created in memory with name: no-name-4857ff83-8ff2-419e-a877-13b0db79e903
[I 2025-02-11 12:09:59,443] Trial 0 finished with value: 0.6700382108338953 and parameters: {'n_estimators': 169, 'learning_rate': 0.04862421928194639, 'max_depth': 4}. Best is trial 0 with value: 0.6700382108338953.
[I 2025-02-11 12:13:47,804] Trial 1 finished with value: 0.6657675882220724 and parameters: {'n_estimators': 177, 'learning_rate': 0.0764860020251593, 'max_depth': 9}. Best is trial 0 with value: 0.6700382108338953.
[I 2025-02-11 12:20:53,011] Trial 2 finished with value: 0.6684648235558552 and parameters: {'n_estimators': 267, 'learning_rate': 0.011903792887698253, 'max_depth': 6}. Best is trial 0 with value: 0.6700382108338953.
[I 2025-02-11 12:23:06,701] Trial 3 finished with value: 0.6637446617217352 and parameters: {'n_estimators': 101, 'learning_rate': 0.11361123484940094, 'max_depth': 9}. Best is trial 0 

Best parameters: {'n_estimators': 290, 'learning_rate': 0.19493359274098765, 'max_depth': 2}


In [10]:
best_params = study.best_params

best_model = GradientBoostingClassifier(
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    random_state=42
)

best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))
print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = best_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))
print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.86      0.42      0.56      2200
     Swarming       0.62      0.94      0.75      2249

     accuracy                           0.68      4449
    macro avg       0.74      0.68      0.65      4449
 weighted avg       0.74      0.68      0.66      4449

Accuracy: 0.6790289952798382
F1-Score: 0.6555329110345602

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       1.00      0.17      0.29      5600
     Swarming       0.55      1.00      0.71      5600

     accuracy                           0.58     11200
    macro avg       0.77      0.58      0.50     11200
 weighted avg       0.77      0.58      0.50     11200

Accuracy (Test): 0.5841964285714286
F1-Score (Test): 0.4972798254952314
