In [None]:
%pip install -r ../requirements.txt

In [20]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import librosa
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Constants
SAMPLE_RATE = 44100
N_MELS = 128
TIME_STEPS = 128
MIC_POSITIONS = torch.tensor([  # Hexagonal array (shape: [3, 6])
    [0.0, 0.0, 0.0],      # Mic 1 (center)
    [1.5, 0.0, 0.0],      # Mic 2 (right)
    [0.75, 1.299, 0.0],   # Mic 3 (top-right)
    [-0.75, 1.299, 0.0],  # Mic 4 (top-left)
    [-1.5, 0.0, 0.0],     # Mic 5 (left)
    [-0.75, -1.299, 0.0]  # Mic 6 (bottom-left)
], dtype=torch.float32).T
SPEED_OF_SOUND = 343.0
DATA_DIR = "../data/simulations/"

#### Data loading & pre-processing

In [21]:
class GunshotDataset(Dataset):
    def __init__(self, sim_dirs, labels):
        self.sim_dirs = sim_dirs
        self.labels = labels

    def __len__(self):
        return len(self.sim_dirs)

    def __getitem__(self, idx):
        # Load and convert to fixed-size spectrogram
        specs = []
        for mic in range(1, 7):
            audio, _ = librosa.load(
                f"{self.sim_dirs[idx]}/mic_{mic}_recording.wav",
                sr=SAMPLE_RATE
            )
            S = librosa.feature.melspectrogram(
                y=audio,
                sr=SAMPLE_RATE,
                n_mels=N_MELS,
                n_fft=2048,
                hop_length=len(audio)//(TIME_STEPS-1)
            )
            S = librosa.power_to_db(S, ref=np.max)
            S = torch.tensor(S, dtype=torch.float32)
            # Pad/truncate to 128x128
            if S.shape[1] < TIME_STEPS:
                S = torch.nn.functional.pad(S, (0, TIME_STEPS-S.shape[1]))
            else:
                S = S[:, :TIME_STEPS]
            specs.append(S)
        # Stack to [6, 128, 128] -> permute to [128, 128, 6]
        X = torch.stack(specs).permute(1, 2, 0)
        y = torch.tensor(self.labels[idx], dtype=torch.float32)
        return X, y

# Load data
labels = pd.read_csv(f"{DATA_DIR}/labels.csv")
sim_dirs = [f"{DATA_DIR}/gunshot_{i}" for i in range(len(labels))]
X_train, X_test, y_train, y_test = train_test_split(
    sim_dirs, labels[["distance", "azimuth", "elevation"]].values,
    test_size=0.2, random_state=42
)

train_dataset = GunshotDataset(X_train, y_train)
test_dataset = GunshotDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

#### Spectogram PINN model

In [28]:
class SpectrogramPINN(nn.Module):
    def __init__(self):
        super().__init__()
        # Input shape: [batch, 6, 128, 128]
        self.conv1 = nn.Conv2d(6, 32, kernel_size=3, padding=1)  # [batch, 32, 128, 128]
        self.pool = nn.MaxPool2d(2)  # [batch, 32, 64, 64]
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)  # [batch, 64, 64, 64]
        # Calculate flattened dimension: 64 * 64 * 64 = 262144
        self.fc1 = nn.Linear(64 * 64 * 64, 128)  # Corrected dimension
        self.fc2 = nn.Linear(128, 3)  # distance, azimuth, elevation

    def forward(self, x):
        # Input: [batch, 128, 128, 6] -> permute to [batch, 6, 128, 128]
        x = x.permute(0, 3, 1, 2)
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # Flatten to [batch, 64*64*64]
        x = torch.relu(self.fc1(x))
        return self.fc2(x)

#### Physics Informed Loss

In [26]:
def physics_loss(y_pred, mic_positions=MIC_POSITIONS):
    """TDoA loss using predicted coordinates"""
    distance, azimuth, elevation = y_pred[:, 0], y_pred[:, 1], y_pred[:, 2]
    
    # Convert to Cartesian (relative to mic1)
    x = distance * torch.cos(azimuth) * torch.cos(elevation)
    y = distance * torch.sin(azimuth) * torch.cos(elevation)
    z = distance * torch.sin(elevation)
    source_pos = torch.stack([x, y, z], dim=1)  # [batch, 3]
    
    # Expected TDoA
    distances = torch.norm(mic_positions - source_pos.unsqueeze(2), dim=1)  # [batch, 6]
    tdoa_pred = (distances - distances[:, 0:1]) / SPEED_OF_SOUND  # [batch, 6]
    
    # Approximate TDoA from spectrograms (simplified)
    tdoa_spectro = ...  # Implement phase-based TDoA here
    
    return torch.mean((tdoa_pred[:, 1:] - tdoa_spectro)**2)

def hybrid_loss(y_pred, y_true, alpha=0.1):
    mse_loss = nn.functional.mse_loss(y_pred, y_true)
    phys_loss = physics_loss(y_pred)
    return mse_loss + alpha * phys_loss

#### Training loop

In [29]:
def train(model, dataloader, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for X_batch, y_batch in dataloader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = hybrid_loss(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(dataloader)
        scheduler.step(avg_loss)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")

# Initialize
model = SpectrogramPINN()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

# Train
train(model, train_loader)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x65536 and 262144x128)