In [10]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [11]:
def load_pickle_data(file_path):
    with open(file_path, 'rb') as f:
        data = pickle.load(f)
    return data

In [12]:
train_data = load_pickle_data("Train_stft_features.pkl")
val_data = load_pickle_data("Val_stft_features.pkl")
test_data = load_pickle_data("Test_stft_features.pkl")

# SVM (Support vector machine)

In [13]:
X_train = []
y_train = []

for item in train_data:
    stft = np.std(item['stft'], axis=1) 
    label = item['category']

    X_train.append(stft)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

X_val = []
y_val = []

for item in val_data:
    stft = np.std(item['stft'], axis=1) 
    label = item['category']

    X_val.append(stft)
    y_val.append(label)

X_val = np.array(X_val)
y_val = np.array(y_val)

svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

X_test = []
y_test = []

for item in test_data:
    stft = np.std(item['stft'], axis=1) 
    label = item['category']

    X_test.append(stft)
    y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)

y_test_pred = svm_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.48      0.42      0.45      1800
     Swarming       0.48      0.55      0.51      1800

     accuracy                           0.48      3600
    macro avg       0.48      0.48      0.48      3600
 weighted avg       0.48      0.48      0.48      3600

Accuracy: 0.4822222222222222
F1-Score: 0.4799107142857142

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       0.43      0.72      0.54      2400
     Swarming       0.19      0.06      0.09      2441

     accuracy                           0.39      4841
    macro avg       0.31      0.39      0.32      4841
 weighted avg       0.31      0.39      0.32      4841

Accuracy (Test): 0.39103491014253255
F1-Score (Test): 0.3158154335393749


# KNN (K-Nearest Neighbors)

In [14]:
knn_model = KNeighborsClassifier(n_neighbors=6)
knn_model.fit(X_train, y_train)

y_val_pred_knn = knn_model.predict(X_val)

print("Classification report for Validation Set:")
print(classification_report(y_val, y_val_pred_knn))

accuracy_val_knn = accuracy_score(y_val, y_val_pred_knn)
f1_val_knn = f1_score(y_val, y_val_pred_knn, average='weighted')

print(f"Accuracy on Validation Set (KNN): {accuracy_val_knn}")
print(f"F1-Score on Validation Set (KNN): {f1_val_knn}")

y_test_pred_knn = knn_model.predict(X_test)

print("Classification report for Test Set:")
print(classification_report(y_test, y_test_pred_knn))

accuracy_test_knn = accuracy_score(y_test, y_test_pred_knn)
f1_test_knn = f1_score(y_test, y_test_pred_knn, average='weighted')

print(f"Accuracy on Test Set (KNN): {accuracy_test_knn}")
print(f"F1-Score on Test Set (KNN): {f1_test_knn}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.44      0.41      0.42      1800
     Swarming       0.45      0.48      0.47      1800

     accuracy                           0.45      3600
    macro avg       0.45      0.45      0.45      3600
 weighted avg       0.45      0.45      0.45      3600

Accuracy on Validation Set (KNN): 0.4463888888888889
F1-Score on Validation Set (KNN): 0.4455623232753088
Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       0.44      0.76      0.56      2400
     Swarming       0.21      0.06      0.10      2441

     accuracy                           0.41      4841
    macro avg       0.33      0.41      0.33      4841
 weighted avg       0.33      0.41      0.33      4841

Accuracy on Test Set (KNN): 0.40900640363561247
F1-Score on Test Set (KNN): 0.32655355789765667


# NB (Naive Bayes)

In [15]:
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

y_pred = nb_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")

Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.41      0.35      0.38      1800
     Swarming       0.43      0.49      0.46      1800

     accuracy                           0.42      3600
    macro avg       0.42      0.42      0.42      3600
 weighted avg       0.42      0.42      0.42      3600

Accuracy: 0.4186111111111111
F1-Score: 0.41577106426076327

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       0.43      0.72      0.54      2400
     Swarming       0.19      0.06      0.09      2441

     accuracy                           0.39      4841
    macro avg       0.31      0.39      0.32      4841
 weighted avg       0.31      0.39      0.32      4841

Accuracy (Test): 0.39103491014253255
F1-Score (Test): 0.3158154335393749


# Random Forest(RF)

In [16]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=2000, random_state=42)
rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))

print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = rf_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))

print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.50      0.43      0.46      1800
     Swarming       0.50      0.56      0.53      1800

     accuracy                           0.50      3600
    macro avg       0.50      0.50      0.49      3600
 weighted avg       0.50      0.50      0.49      3600

Accuracy: 0.4961111111111111
F1-Score: 0.4938991065580149

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       0.41      0.66      0.50      2400
     Swarming       0.17      0.07      0.10      2441

     accuracy                           0.36      4841
    macro avg       0.29      0.36      0.30      4841
 weighted avg       0.29      0.36      0.30      4841

Accuracy (Test): 0.3606692832059492
F1-Score (Test): 0.3004251993049194


# Gradient Boosting, GB

In [17]:
import optuna
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 300)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.2)
    max_depth = trial.suggest_int('max_depth', 2, 10)

    model = GradientBoostingClassifier(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        max_depth=max_depth,
        random_state=42
    )

    model.fit(X_train, y_train)

    y_pred = model.predict(X_val)

    return accuracy_score(y_val, y_pred)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=15)

print("Best parameters:", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-02-09 20:14:40,940] A new study created in memory with name: no-name-f2c3abed-09f5-4ef5-b031-1824202723bb
[I 2025-02-09 20:18:36,639] Trial 0 finished with value: 0.5172222222222222 and parameters: {'n_estimators': 87, 'learning_rate': 0.06823099874177987, 'max_depth': 10}. Best is trial 0 with value: 0.5172222222222222.
[I 2025-02-09 20:20:16,968] Trial 1 finished with value: 0.505 and parameters: {'n_estimators': 156, 'learning_rate': 0.09422552359292131, 'max_depth': 2}. Best is trial 0 with value: 0.5172222222222222.
[I 2025-02-09 20:33:51,623] Trial 2 finished with value: 0.515 and parameters: {'n_estimators': 300, 'learning_rate': 0.06300071896223532, 'max_depth': 10}. Best is trial 0 with value: 0.5172222222222222.
[I 2025-02-09 20:43:32,164] Trial 3 finished with value: 0.5069444444444444 and parameters: {'n_estimators': 256, 'learning_rate': 0.07291733173537808, 'max_depth': 8}. Best is trial 0 with value: 0.51722222222

Best parameters: {'n_estimators': 53, 'learning_rate': 0.014662952481763332, 'max_depth': 8}


In [18]:
best_params = study.best_params

best_model = GradientBoostingClassifier(
    n_estimators=best_params['n_estimators'],
    learning_rate=best_params['learning_rate'],
    max_depth=best_params['max_depth'],
    random_state=42
)

best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_val)

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')

print("Classification report for Validation Set:")
print(classification_report(y_val, y_pred))
print(f"Accuracy: {accuracy}")
print(f"F1-Score: {f1}")

y_test_pred = best_model.predict(X_test)

accuracy_test = accuracy_score(y_test, y_test_pred)
f1_test = f1_score(y_test, y_test_pred, average='weighted')

print("\nClassification report for Test Set:")
print(classification_report(y_test, y_test_pred))
print(f"Accuracy (Test): {accuracy_test}")
print(f"F1-Score (Test): {f1_test}")


Classification report for Validation Set:
               precision    recall  f1-score   support

None_swarming       0.58      0.41      0.48      1800
     Swarming       0.54      0.70      0.61      1800

     accuracy                           0.56      3600
    macro avg       0.56      0.56      0.55      3600
 weighted avg       0.56      0.56      0.55      3600

Accuracy: 0.5552777777777778
F1-Score: 0.5461331665574576

Classification report for Test Set:
               precision    recall  f1-score   support

None_swarming       0.35      0.39      0.37      2400
     Swarming       0.33      0.29      0.31      2441

     accuracy                           0.34      4841
    macro avg       0.34      0.34      0.34      4841
 weighted avg       0.34      0.34      0.34      4841

Accuracy (Test): 0.3420780830406941
F1-Score (Test): 0.3404897482860891
