In [4]:
!pip3 install google numpy librosa matplotlib scikit-learn tqdm

Defaulting to user installation because normal site-packages is not writeable
Collecting google
  Downloading google-3.0.0-py2.py3-none-any.whl.metadata (627 bytes)
Collecting beautifulsoup4 (from google)
  Downloading beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->google)
  Downloading soupsieve-2.6-py3-none-any.whl.metadata (4.6 kB)
Downloading google-3.0.0-py2.py3-none-any.whl (45 kB)
Downloading beautifulsoup4-4.13.4-py3-none-any.whl (187 kB)
Downloading soupsieve-2.6-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, google
Successfully installed beautifulsoup4-4.13.4 google-3.0.0 soupsieve-2.6


In [11]:
import zipfile
import os

import numpy as np
import librosa
import librosa.feature
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

from sklearn.feature_selection import VarianceThreshold, SelectKBest, f_classif

from sklearn.ensemble import IsolationForest

import IPython.display as ipd
import random

import joblib

In [None]:
zip_path = "data.zip"
extract_path = "./dataset"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extracted files:", os.listdir(extract_path))

Extracted files: ['drone', 'non_drone']


In [17]:
drone_path = os.path.join(extract_path, "drone")
non_drone_path = os.path.join(extract_path, "non_drone")

# Function to extract MFCC features
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfccs, axis=1)

X, y = [], []

for file in tqdm(os.listdir(drone_path), desc="Processing Drone Audio"):
    if file.endswith(".wav"):
        X.append(extract_features(os.path.join(drone_path, file)))
        y.append("drone")

for file in tqdm(os.listdir(non_drone_path), desc="Processing Non-Drone Audio"):
    if file.endswith(".wav"):
        X.append(extract_features(os.path.join(non_drone_path, file)))
        y.append("non-drone")

X_ = np.array(X)
y_ = np.array(y)
print(f"Feature shape: {X_.shape}, Labels shape: {y_.shape}")

Processing Drone Audio:   0%|          | 0/1332 [00:00<?, ?it/s]

Processing Drone Audio: 100%|██████████| 1332/1332 [00:22<00:00, 60.40it/s] 
Processing Non-Drone Audio: 100%|██████████| 1728/1728 [00:15<00:00, 110.03it/s]

Feature shape: (3060, 13), Labels shape: (3060,)





In [19]:
import os
import numpy as np
import librosa
import librosa.feature
from sklearn.feature_selection import VarianceThreshold, SelectKBest, f_classif
from sklearn.ensemble import RandomForestClassifier

def extract_mfcc(y, sr):
    return np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13), axis=1)

def extract_mel_spectrogram(y, sr):
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40)
    return np.mean(librosa.power_to_db(mel_spec), axis=1)

def extract_chroma(y, sr):
    return np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)

def extract_spectral_contrast(y, sr):
    return np.mean(librosa.feature.spectral_contrast(y=y, sr=sr), axis=1)

def extract_zcr(y):
    return np.mean(librosa.feature.zero_crossing_rate(y))

def extract_spectral_rolloff(y, sr):
    return np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))

def extract_spectral_centroid(y, sr):
    return np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))

# Unified feature extraction function
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)

    features = np.hstack([
        extract_mfcc(y, sr),
        extract_mel_spectrogram(y, sr),
        extract_chroma(y, sr),
        extract_spectral_contrast(y, sr),
        extract_zcr(y),
        extract_spectral_rolloff(y, sr),
        extract_spectral_centroid(y, sr),
    ])

    return features

def load_dataset(drone_path, non_drone_path):
    X, y = [], []

    for file in os.listdir(drone_path):
        if file.endswith(".wav"):
            X.append(extract_features(os.path.join(drone_path, file)))
            y.append("drone")

    for file in os.listdir(non_drone_path):
        if file.endswith(".wav"):
            X.append(extract_features(os.path.join(non_drone_path, file)))
            y.append("non-drone")

    return np.array(X), np.array(y)

def select_features(X, y):
    # Remove low-variance features
    selector = VarianceThreshold(threshold=0.01)
    X_filtered = selector.fit_transform(X)

    # Select top features using ANOVA F-score
    selector = SelectKBest(score_func=f_classif, k=20)
    X_filtered = selector.fit_transform(X_filtered, y)

    # Feature importance using Random Forest
    model = RandomForestClassifier()
    model.fit(X_filtered, y)
    importances = model.feature_importances_
    top_features = np.argsort(importances)[-20:]  # Select top 20 features

    return X_filtered[:, top_features]

def preprocess_audio_data(drone_path, non_drone_path):
    X, y = load_dataset(drone_path, non_drone_path)
    X_selected = select_features(X, y)
    print(f"Final feature shape: {X_selected.shape}, Labels shape: {y.shape}")
    return X_selected, y

In [20]:
X_selected, y = preprocess_audio_data(drone_path, non_drone_path)

  return pitch_tuning(


Final feature shape: (3060, 20), Labels shape: (3060,)


In [21]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_selected, y_encoded, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Binary Classification Accuracy: {accuracy:.2f}")

Binary Classification Accuracy: 0.98


In [22]:
drone_files = [os.path.join(drone_path, f) for f in os.listdir(drone_path) if f.endswith(".wav")]
non_drone_files = [os.path.join(non_drone_path, f) for f in os.listdir(non_drone_path) if f.endswith(".wav")]

print(f"Drone samples: {len(drone_files)}, Non-drone samples: {len(non_drone_files)}")

X_drone_only = np.array([extract_features(file) for file in tqdm(drone_files, desc="Processing Drone Files for Anomaly Detection")])

anomaly_model = IsolationForest(contamination=0.1, random_state=42)
anomaly_model.fit(X_drone_only)

X_all = np.array([extract_features(file) for file in tqdm(drone_files + non_drone_files, desc="Testing Anomaly Detection")])

predictions = anomaly_model.predict(X_all)
predictions = ["drone" if p == 1 else "non-drone" for p in predictions]

print("Anomaly detection results:", predictions[:10])

Drone samples: 1332, Non-drone samples: 1728


  return pitch_tuning(
Processing Drone Files for Anomaly Detection: 100%|██████████| 1332/1332 [00:07<00:00, 167.98it/s]
  return pitch_tuning(
Testing Anomaly Detection: 100%|██████████| 3060/3060 [00:30<00:00, 99.58it/s] 

Anomaly detection results: ['drone', 'drone', 'drone', 'drone', 'drone', 'drone', 'drone', 'drone', 'drone', 'non-drone']





In [23]:
num_samples = 5
random_indices = random.sample(range(len(X_all)), num_samples)

print("Playing samples with predicted labels:")
for idx in random_indices:
    file_path = (drone_files + non_drone_files)[idx]
    predicted_label = predictions[idx]

    print(f"Predicted: {predicted_label} | File: {file_path}")

    ipd.display(ipd.Audio(file_path, rate=22050))


Playing samples with predicted labels:
Predicted: non-drone | File: ./dataset/non_drone/3-163607-B-132.wav


Predicted: drone | File: ./dataset/drone/mixed_41-bebop_000_.wav


Predicted: non-drone | File: ./dataset/non_drone/1-34094-A-50.wav


Predicted: non-drone | File: ./dataset/drone/mixed_membo_15-membo_003_.wav


Predicted: non-drone | File: ./dataset/non_drone/Best_Helicopter_Sounds.__Top_Sounds_that_Helicopters_make_089.wav


In [24]:
# ground truth labels (1 = drone, -1 = non-drone)
true_labels = [1] * len(drone_files) + [-1] * len(non_drone_files)

# predictions ("drone"/"non-drone") to numerical labels
predicted_labels = [1 if p == "drone" else -1 for p in predictions]

accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Anomaly Detection Accuracy: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels, target_names=["Non-Drone", "Drone"]))

print("\nConfusion Matrix:")
print(confusion_matrix(true_labels, predicted_labels))

Anomaly Detection Accuracy: 0.90

Classification Report:
              precision    recall  f1-score   support

   Non-Drone       0.92      0.90      0.91      1728
       Drone       0.88      0.90      0.89      1332

    accuracy                           0.90      3060
   macro avg       0.90      0.90      0.90      3060
weighted avg       0.90      0.90      0.90      3060


Confusion Matrix:
[[1559  169]
 [ 134 1198]]


In [25]:
# !pip install sounddevice scipy librosa

In [26]:
# # Save the trained IsolationForest model
# joblib.dump(anomaly_model, "drone_anomaly_model.pkl")

# # Download the model file
# from google.colab import files
# files.download("drone_anomaly_model.pkl")