In [1]:
!pip install numpy librosa matplotlib scikit-learn tqdm



In [2]:
from google.colab import files
import zipfile
import os
import numpy as np
import librosa
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split

In [4]:
zip_path = "data.zip"
extract_path = "/content/dataset"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extracted files:", os.listdir(extract_path))

Extracted files: ['non_drone', 'drone']


In [5]:
drone_path = os.path.join(extract_path, "drone")
non_drone_path = os.path.join(extract_path, "non_drone")

In [6]:
# Parameters
SAMPLE_RATE = 16000
N_MFCC = 40
FRAME_LENGTH = 1  # 1 second

# Feature extraction (without collapsing time)
def extract_features(file_path, sample_rate=SAMPLE_RATE):
    y, sr = librosa.load(file_path, sr=sample_rate, duration=FRAME_LENGTH)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC)
    mfcc = mfcc.T  # shape (time_steps, n_mfcc)
    return mfcc

# Prepare Dataset
def load_data(drone_path, non_drone_path):
    X, y = [], []

    for file in os.listdir(drone_path):
        if file.endswith(".wav"):
            features = extract_features(os.path.join(drone_path, file))
            X.append(features)
            y.append(1)  # drone = 1

    for file in os.listdir(non_drone_path):
        if file.endswith(".wav"):
            features = extract_features(os.path.join(non_drone_path, file))
            X.append(features)
            y.append(0)  # non-drone = 0

    return X, y

X, y = load_data(drone_path, non_drone_path)

# Padding sequences to the same length (important for batching)
max_len = max([x.shape[0] for x in X])
X_padded = np.zeros((len(X), max_len, N_MFCC))
for i, feat in enumerate(X):
    X_padded[i, :feat.shape[0], :] = feat

X_tensor = torch.tensor(X_padded, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

# Dataset and DataLoader
dataset = TensorDataset(X_tensor, y_tensor)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Small CNN model
class AudioCNN(nn.Module):
    def __init__(self):
        super(AudioCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
        self.pool = nn.MaxPool2d(kernel_size=(2, 2))
        self.fc1 = nn.Linear(16 * ((max_len - 4) // 2) * ((N_MFCC - 4) // 2), 64)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = AudioCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
n_epochs = 10
for epoch in range(n_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}")

# Testing
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Epoch 1, Loss: 0.8958
Epoch 2, Loss: 0.1262
Epoch 3, Loss: 0.1046
Epoch 4, Loss: 0.0719
Epoch 5, Loss: 0.0640
Epoch 6, Loss: 0.0501
Epoch 7, Loss: 0.0433
Epoch 8, Loss: 0.0395
Epoch 9, Loss: 0.0236
Epoch 10, Loss: 0.0235
Test Accuracy: 97.39%


In [7]:
import torch
from google.colab import files

# 1. Save only the model weights (recommended)
torch.save(model.state_dict(), "audio_cnn_weights.pth")

# 3. Download to your local machine
files.download("audio_cnn_weights.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>