# Load Data

In [32]:
import numpy as np
import glob
import copy
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.optim as optim
from sklearn.metrics import accuracy_score
import copy
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import MinMaxScaler

In [33]:
birds_labels = {
    "other": 0,
    "comcuc": 1,
    "cowpig1": 2,
    "eucdov": 3,
    "eueowl1": 4,
    "grswoo": 5,
    "tawowl1": 6
}

birds = [bird for bird in list(birds_labels.keys()) if bird != "other"]

In [34]:
def unique_rows(matrix):
    indexes = []
    labels = []
    
    for i, row in enumerate(matrix):
        if np.unique(row).size == 1:
            indexes.append(i)
            labels.append(np.unique(row)[0])
    
    indexes = np.array(indexes)
    labels = np.array(labels)
    
    return indexes, labels

In [35]:
def load_data(bird):
    labels = []
    features = []
    bird_id = birds_labels[bird]
    
    path = f'./data/{bird}/'
    labels_files = glob.glob(path + '*labels.npy')
    counter = None
    
    for i, file in enumerate(labels_files):
        print(f'{bird}: {i + 1}/{len(labels_files)}', end='\r')
        counter = i
        data_id = path + ''.join(file.split(".labels.npy")).split('/')[-1] + '.npy'
        
        annotations = np.load(file)
        feature = np.load(data_id)
        
        ind, label = unique_rows(annotations)
        
        if len(ind) == 0:
            continue
        
        labels.append(label)
        features.append(feature[ind])

    print('\n')
    labels = np.concatenate(labels)
    features = np.concatenate(features)
    
    return labels, features

In [36]:
X = []
y = []

for bird in birds:
    labels, features = load_data(bird)
    X.append(features)
    y.append(labels)
    
X = np.concatenate(X)
y = np.concatenate(y)

comcuc: 200/200

cowpig1: 200/200

eucdov: 200/200

eueowl1: 200/200

grswoo: 200/200

tawowl1: 200/200



# Feature Selection

In [37]:
# Apply MinMaxScaler to make the data non-negative
scaler = StandardScaler()
X = scaler.fit_transform(X)

# # Perform feature selection
# k_best_selector = SelectKBest(chi2, k=128)  # Select 2 best features
# X_selected = k_best_selector.fit_transform(X, y)

# # Print the selected features
# selected_feature_indices = k_best_selector.get_support(indices=True)
# # selected_feature_names = [data.feature_names[i] for i in selected_feature_indices]
# # print("Selected Features:", selected_feature_names)

In [38]:

X.shape

(85500, 548)

# Neural Network

In [39]:
import torch.nn as nn
import torch.nn.functional as F

class AudioCNN(nn.Module):
    def __init__(self):
        super(AudioCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(2, 2)
        self.dropout = nn.Dropout(0.2)
        self.fc1 = nn.Linear(17408, 1024)  # Updated input dimension
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 7)  # 7 output classes

    def forward(self, x):
        x = x.unsqueeze(1)  # (N, 548) -> (N, 1, 548)
        x = self.pool(F.relu(self.conv1(x)))  # conv1 + ReLU + max pooling
        x = self.pool(F.relu(self.conv2(x)))  # conv2 + ReLU + max pooling
        x = self.pool(F.relu(self.conv3(x)))  # conv3 + ReLU + max pooling
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))  # fc1 + ReLU
        x = F.relu(self.fc2(x))  # fc2 + ReLU
        x = self.fc3(x)  # fc3
        return x

### Training

In [40]:
# Define the loss function and optimizer
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

# Define the hyperparameters
learning_rate = 1e-5
batch_size = 64
num_epochs = 500

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train = torch.from_numpy(X_train).float().to(device)
X_test = torch.from_numpy(X_test).float().to(device)
y_train = torch.from_numpy(y_train).long().to(device)
y_test = torch.from_numpy(y_test).long().to(device)

# Create the DataLoader for training and test sets
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Initialize the model
model = AudioCNN().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())
losses = []
accuracies = []

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        
    epoch_loss = running_loss / len(train_loader.dataset)

    # Evaluate the model
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_acc = correct / total

    # deep copy the model
    if epoch_acc > best_acc:
        best_acc = epoch_acc
        best_model_wts = copy.deepcopy(model.state_dict())

    losses.append(epoch_loss)
    accuracies.append(epoch_acc)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.5f}, Accuracy: {epoch_acc:.5f}')

print('Best Accuracy: {:5f}'.format(best_acc))

# load best model weights
model.load_state_dict(best_model_wts)


mps
Epoch 1/500, Loss: 0.84302, Accuracy: 0.79889
Epoch 2/500, Loss: 0.53024, Accuracy: 0.85099
Epoch 3/500, Loss: 0.40865, Accuracy: 0.87181
Epoch 4/500, Loss: 0.34133, Accuracy: 0.88409
Epoch 5/500, Loss: 0.29741, Accuracy: 0.90152
Epoch 6/500, Loss: 0.26643, Accuracy: 0.91029
Epoch 7/500, Loss: 0.24423, Accuracy: 0.91947
Epoch 8/500, Loss: 0.22570, Accuracy: 0.92415
Epoch 9/500, Loss: 0.21126, Accuracy: 0.92778
Epoch 10/500, Loss: 0.20021, Accuracy: 0.93193
Epoch 11/500, Loss: 0.19128, Accuracy: 0.93444
Epoch 12/500, Loss: 0.18359, Accuracy: 0.93298
Epoch 13/500, Loss: 0.17677, Accuracy: 0.93848
Epoch 14/500, Loss: 0.17009, Accuracy: 0.94146
Epoch 15/500, Loss: 0.16479, Accuracy: 0.94333
Epoch 16/500, Loss: 0.16010, Accuracy: 0.94158
Epoch 17/500, Loss: 0.15591, Accuracy: 0.94427
Epoch 18/500, Loss: 0.15073, Accuracy: 0.94743
Epoch 19/500, Loss: 0.14724, Accuracy: 0.94737
Epoch 20/500, Loss: 0.14471, Accuracy: 0.94836
Epoch 21/500, Loss: 0.14107, Accuracy: 0.95129
Epoch 22/500, Loss

Epoch 174/500, Loss: 0.01224, Accuracy: 0.98386
Epoch 175/500, Loss: 0.01258, Accuracy: 0.98129
Epoch 176/500, Loss: 0.01235, Accuracy: 0.98392
Epoch 177/500, Loss: 0.01220, Accuracy: 0.97678
Epoch 178/500, Loss: 0.01210, Accuracy: 0.98333
Epoch 179/500, Loss: 0.01118, Accuracy: 0.98269
Epoch 180/500, Loss: 0.01098, Accuracy: 0.98427
Epoch 181/500, Loss: 0.01142, Accuracy: 0.98111
Epoch 182/500, Loss: 0.01045, Accuracy: 0.98363
Epoch 183/500, Loss: 0.01089, Accuracy: 0.98275
Epoch 184/500, Loss: 0.00985, Accuracy: 0.98199
Epoch 185/500, Loss: 0.01031, Accuracy: 0.98193
Epoch 186/500, Loss: 0.00965, Accuracy: 0.98368
Epoch 187/500, Loss: 0.00959, Accuracy: 0.98351
Epoch 188/500, Loss: 0.00955, Accuracy: 0.98298
Epoch 189/500, Loss: 0.00920, Accuracy: 0.98298
Epoch 190/500, Loss: 0.00846, Accuracy: 0.97947
Epoch 191/500, Loss: 0.00983, Accuracy: 0.98363
Epoch 192/500, Loss: 0.00766, Accuracy: 0.98322
Epoch 193/500, Loss: 0.00921, Accuracy: 0.98357
Epoch 194/500, Loss: 0.00712, Accuracy: 

Epoch 345/500, Loss: 0.00025, Accuracy: 0.98591
Epoch 346/500, Loss: 0.00014, Accuracy: 0.98602
Epoch 347/500, Loss: 0.00012, Accuracy: 0.98596
Epoch 348/500, Loss: 0.00349, Accuracy: 0.97977
Epoch 349/500, Loss: 0.00121, Accuracy: 0.98532
Epoch 350/500, Loss: 0.00017, Accuracy: 0.98591
Epoch 351/500, Loss: 0.00012, Accuracy: 0.98585
Epoch 352/500, Loss: 0.00270, Accuracy: 0.98322
Epoch 353/500, Loss: 0.00057, Accuracy: 0.98579
Epoch 354/500, Loss: 0.00015, Accuracy: 0.98585
Epoch 355/500, Loss: 0.00010, Accuracy: 0.98550
Epoch 356/500, Loss: 0.00410, Accuracy: 0.98444
Epoch 357/500, Loss: 0.00027, Accuracy: 0.98591
Epoch 358/500, Loss: 0.00013, Accuracy: 0.98596
Epoch 359/500, Loss: 0.00010, Accuracy: 0.98626
Epoch 360/500, Loss: 0.00009, Accuracy: 0.98561
Epoch 361/500, Loss: 0.00446, Accuracy: 0.98550
Epoch 362/500, Loss: 0.00022, Accuracy: 0.98596
Epoch 363/500, Loss: 0.00012, Accuracy: 0.98596
Epoch 364/500, Loss: 0.00010, Accuracy: 0.98526
Epoch 365/500, Loss: 0.00009, Accuracy: 

<All keys matched successfully>