In [1]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import torch.nn.functional as F
import json

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  return torch._C._cuda_getDeviceCount() > 0


In [2]:
DATA_PATH = "../data/fouriers/"

In [3]:
# frequency range to fit model on
MIN_FREQ, MAX_FREQ = 2000, 4000
SAMPLE_RATE = 48000
NYQUIST = SAMPLE_RATE / 2 + 1

class AudioDataset(Dataset):
    def __init__(self, min_freq, max_freq):
        assert min_freq < max_freq, "Common man.."

        self.files = os.listdir(DATA_PATH)

        sample_fourier = json.load(open(DATA_PATH + self.files[0], "r")) 
        n_freqs = len(sample_fourier["powers"])
        # indices that correspond to MIN_FREQ and MAX_FREQ
        self.min_i = int(min_freq * n_freqs / NYQUIST)
        self.max_i = int(max_freq * n_freqs / NYQUIST)
        # number of frequencies per sample
        self.n_freqs = self.max_i - self.min_i + 1

        self.data = []        
        self.load_data()

    def load_data(self):  
        
         
        for file in self.files:
            fourier = json.load(open(DATA_PATH + file, "r")) 
            self.data.append((torch.tensor(fourier["powers"][self.min_i:self.max_i+1]), fourier["label"]))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

dataset = AudioDataset(MIN_FREQ, MAX_FREQ)

In [4]:
class TwoLayerIntegrated(nn.Module):
    def __init__(self, input_dim):
        super(TwoLayerIntegrated, self).__init__()
        
        self.layer1 = nn.Linear(input_dim, 128)
        self.layer2 = nn.Linear(128, 1)
    
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.sigmoid(self.layer2(x))
        
        return x

In [5]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

# Set random seed for reproducibility
torch.manual_seed(42)

# Parameters
batch_size = 256
num_epochs = 200
validation_split = 0.2

# Create dataset and dataloader
dataset_size = len(dataset)
val_size = int(validation_split * dataset_size)
train_size = dataset_size - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

model = TwoLayerIntegrated(dataset.n_freqs).to(device)

# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters())

# Lists to keep track of losses and accuracy for plotting
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

def calculate_accuracy(predictions, targets):
    rounded_preds = torch.round(predictions)
    correct = (rounded_preds == targets).sum().item()
    total = targets.size(0)
    return correct / total

for epoch in tqdm(range(num_epochs)):
    # Training phase
    model.train()
    train_loss_sum = 0.0
    correct_train = 0
    total_train = 0
    for batch_waveforms, batch_labels in train_dataloader:
        batch_waveforms = batch_waveforms.to(device)
        batch_labels = batch_labels.to(device)
        optimizer.zero_grad()
        batch_outputs = model(batch_waveforms)
        loss = criterion(batch_outputs, batch_labels.unsqueeze(1).float())
        loss.backward()
        optimizer.step()
        train_loss_sum += loss.item()

        batch_predictions = torch.round(batch_outputs)
        correct_train += (batch_predictions == batch_labels.unsqueeze(1)).sum().item()
        total_train += batch_labels.size(0)
    
    train_loss_avg = train_loss_sum / len(train_dataloader)
    train_acc = correct_train / total_train
    train_losses.append(train_loss_avg)
    train_accuracies.append(train_acc)

    # Validation phase
    model.eval()
    val_loss_sum = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for batch_waveforms, batch_labels in val_dataloader:
            batch_waveforms = batch_waveforms.to(device)
            batch_labels = batch_labels.to(device)
            batch_outputs = model(batch_waveforms)
            loss = criterion(batch_outputs, batch_labels.unsqueeze(1).float())
            val_loss_sum += loss.item()

            batch_predictions = torch.round(batch_outputs)
            correct_val += (batch_predictions == batch_labels.unsqueeze(1)).sum().item()
            total_val += batch_labels.size(0)
    
    val_loss_avg = val_loss_sum / len(val_dataloader)
    val_acc = correct_val / total_val
    val_losses.append(val_loss_avg)
    val_accuracies.append(val_acc)

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}] - "
            f"Train Loss: {train_loss_avg:.4f}, Train Acc: {train_acc:.4f}, "
            f"Val Loss: {val_loss_avg:.4f}, Val Acc: {val_acc:.4f}")

# Save the trained model
torch.save(model.state_dict(),  "../models/whistle_detection.pth")

sample_input = torch.randn(1, dataset.n_freqs)
torch.onnx.export(model, sample_input.to(device), "../models/whistle_detection.onnx") 

  6%|▋         | 13/200 [00:00<00:12, 15.09it/s]

Epoch [10/200] - Train Loss: 0.1685, Train Acc: 0.9667, Val Loss: 0.1777, Val Acc: 0.9740


 10%|█         | 21/200 [00:01<00:13, 13.62it/s]

Epoch [20/200] - Train Loss: 0.0993, Train Acc: 0.9724, Val Loss: 0.1228, Val Acc: 0.9761


 16%|█▌        | 31/200 [00:02<00:11, 15.09it/s]

Epoch [30/200] - Train Loss: 0.0922, Train Acc: 0.9756, Val Loss: 0.1072, Val Acc: 0.9768


 22%|██▏       | 43/200 [00:03<00:10, 15.56it/s]

Epoch [40/200] - Train Loss: 0.0736, Train Acc: 0.9776, Val Loss: 0.0999, Val Acc: 0.9785


 26%|██▌       | 51/200 [00:03<00:09, 15.26it/s]

Epoch [50/200] - Train Loss: 0.0706, Train Acc: 0.9782, Val Loss: 0.1387, Val Acc: 0.9775


 30%|███       | 61/200 [00:04<00:09, 14.96it/s]

Epoch [60/200] - Train Loss: 0.0633, Train Acc: 0.9808, Val Loss: 0.0910, Val Acc: 0.9785


 36%|███▋      | 73/200 [00:05<00:08, 14.28it/s]

Epoch [70/200] - Train Loss: 0.0606, Train Acc: 0.9810, Val Loss: 0.0943, Val Acc: 0.9796


 42%|████▏     | 83/200 [00:05<00:07, 14.93it/s]

Epoch [80/200] - Train Loss: 0.0561, Train Acc: 0.9827, Val Loss: 0.0877, Val Acc: 0.9799


 46%|████▋     | 93/200 [00:06<00:06, 15.43it/s]

Epoch [90/200] - Train Loss: 0.0553, Train Acc: 0.9834, Val Loss: 0.0857, Val Acc: 0.9806


 52%|█████▏    | 103/200 [00:07<00:06, 15.65it/s]

Epoch [100/200] - Train Loss: 0.0509, Train Acc: 0.9849, Val Loss: 0.0843, Val Acc: 0.9789


 56%|█████▋    | 113/200 [00:07<00:05, 16.00it/s]

Epoch [110/200] - Train Loss: 0.0664, Train Acc: 0.9846, Val Loss: 0.0998, Val Acc: 0.9778


 62%|██████▏   | 123/200 [00:08<00:04, 15.58it/s]

Epoch [120/200] - Train Loss: 0.0727, Train Acc: 0.9856, Val Loss: 0.0980, Val Acc: 0.9782


 66%|██████▋   | 133/200 [00:09<00:04, 15.44it/s]

Epoch [130/200] - Train Loss: 0.1112, Train Acc: 0.9818, Val Loss: 0.1104, Val Acc: 0.9802


 70%|███████   | 141/200 [00:09<00:03, 14.97it/s]

Epoch [140/200] - Train Loss: 0.0592, Train Acc: 0.9869, Val Loss: 0.0915, Val Acc: 0.9802


 76%|███████▋  | 153/200 [00:10<00:03, 15.59it/s]

Epoch [150/200] - Train Loss: 0.0585, Train Acc: 0.9872, Val Loss: 0.0925, Val Acc: 0.9816


 80%|████████  | 161/200 [00:10<00:02, 14.98it/s]

Epoch [160/200] - Train Loss: 0.0593, Train Acc: 0.9867, Val Loss: 0.0599, Val Acc: 0.9820


 86%|████████▌ | 171/200 [00:11<00:01, 14.58it/s]

Epoch [170/200] - Train Loss: 0.0403, Train Acc: 0.9880, Val Loss: 0.0500, Val Acc: 0.9827


 92%|█████████▏| 183/200 [00:12<00:01, 14.97it/s]

Epoch [180/200] - Train Loss: 0.0425, Train Acc: 0.9871, Val Loss: 0.0440, Val Acc: 0.9844


 96%|█████████▋| 193/200 [00:13<00:00, 15.47it/s]

Epoch [190/200] - Train Loss: 0.0566, Train Acc: 0.9875, Val Loss: 0.1243, Val Acc: 0.9816


100%|██████████| 200/200 [00:13<00:00, 14.68it/s]


Epoch [200/200] - Train Loss: 0.0567, Train Acc: 0.9893, Val Loss: 0.0947, Val Acc: 0.9830
