In [None]:
%pip install -r ../requirements.txt

In [6]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import librosa
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from qbstyles import mpl_style

mpl_style()

# Constants
SPEED_OF_SOUND = 343.0  # m/s
MIC_POSITIONS = np.array([
    [0.0, 0.0, 0.0],      # Mic 1 (center)
    [1.5, 0.0, 0.0],      # Mic 2 (right)
    [0.75, 1.299, 0.0],   # Mic 3 (top-right)
    [-0.75, 1.299, 0.0],  # Mic 4 (top-left)
    [-1.5, 0.0, 0.0],     # Mic 5 (left)
    [-0.75, -1.299, 0.0]  # Mic 6 (bottom-left)
]).T  # Shape: (3, 6)

DATA_DIR = "../data/simulations/"
N_MELS = 128
SPEC_LEN = 128  # Fixed time steps

#### Data loading & spectogram conversion

In [7]:
def audio_to_spectrogram(audio, sr=44100):
    """Convert to 128x128 log-Mel spectrogram"""
    S = librosa.feature.melspectrogram(
        y=audio, sr=sr, n_mels=N_MELS, 
        n_fft=2048, hop_length=512, fmax=8000
    )
    S_db = librosa.power_to_db(S, ref=np.max)
    
    # Pad/truncate to 128x128
    if S_db.shape[1] < SPEC_LEN:
        pad_width = ((0, 0), (0, SPEC_LEN - S_db.shape[1]))
        S_db = np.pad(S_db, pad_width, mode='constant')
    else:
        S_db = S_db[:, :SPEC_LEN]
    return S_db

def load_spectrograms(sim_dir):
    """Load and convert all 6 mics to fixed-size spectrograms"""
    specs = []
    for mic in range(1, 7):
        audio, sr = librosa.load(f"{sim_dir}/mic_{mic}_recording.wav", sr=None)
        spec = audio_to_spectrogram(audio, sr)
        specs.append(spec)
    return np.stack(specs, axis=-1)  # Shape: (128, 128, 6)

# Load all data (with progress bar)
from tqdm import tqdm
labels = pd.read_csv(f"{DATA_DIR}/labels.csv")
X = []
for i in tqdm(range(len(labels)), desc="Loading spectrograms"):
    X.append(load_spectrograms(f"{DATA_DIR}/gunshot_{i}"))
X = np.array(X)  # Shape: (num_samples, 128, 128, 6)
y = labels[["distance", "azimuth", "elevation"]].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")

Loading spectrograms: 100%|██████████| 100/100 [00:04<00:00, 23.39it/s]

Train shape: (80, 128, 128, 6), Test shape: (20, 128, 128, 6)





#### PINN Loss function (Spectogram compatible)

In [9]:
def spectrogram_physics_loss(y_true, y_pred):
    """Physics loss using group delay for TDoA estimation"""
    # Predicted polar coordinates
    distance, azimuth, elevation = y_pred[:, 0], y_pred[:, 1], y_pred[:, 2]
    
    # Convert to Cartesian (relative to Mic1)
    x = distance * tf.cos(azimuth) * tf.cos(elevation)
    y = distance * tf.sin(azimuth) * tf.cos(elevation)
    z = distance * tf.sin(elevation)
    source_pos = tf.stack([x, y, z], axis=1)  # Shape: (batch, 3)
    
    # Calculate expected TDoA
    mic_positions = tf.constant(MIC_POSITIONS, dtype=tf.float32)  # (3, 6)
    distances = tf.norm(mic_positions - tf.expand_dims(source_pos, 2), axis=1)  # (batch, 6)
    tdoa_pred = (distances - tf.expand_dims(distances[:, 0], 1)) / SPEED_OF_SOUND  # (batch, 6)
    
    # Simplified: Use first peak of spectrogram as TDoA proxy
    tdoa_est = tf.zeros_like(tdoa_pred[:, 1:])  # Replace with actual estimator if available
    return tf.reduce_mean((tdoa_est - tdoa_pred[:, 1:])**2)

#### Spectogram PINN model

In [11]:
def build_spectrogram_pinn():
    """2D CNN with fixed 128x128 input"""
    model = models.Sequential([
        layers.Input(shape=(128, 128, 6)),
        
        # Spectral-temporal features
        layers.Conv2D(64, (5, 5), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),
        
        # Regression head
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(3)  # distance, azimuth, elevation
    ])
    return model

model = build_spectrogram_pinn()
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss='mse',
    metrics=['mae']
)
model.summary()

In [16]:
class SpectrogramTrainer(tf.keras.Model):
    def __init__(self, model, **kwargs):
        super().__init__(**kwargs)
        self.model = model
        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.mse_loss_tracker = tf.keras.metrics.Mean(name="mse_loss")
        self.phys_loss_tracker = tf.keras.metrics.Mean(name="physics_loss")

    def call(self, inputs):
        # Delegate to the wrapped model
        return self.model(inputs)

    def train_step(self, data):
        x, y = data
        
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Uses the call() method
            
            # Calculate losses
            mse_loss = tf.reduce_mean(tf.keras.losses.mean_squared_error(y, y_pred))
            phys_loss = spectrogram_physics_loss(y, y_pred)
            total_loss = mse_loss + 0.1 * phys_loss  # Weighted sum

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(total_loss, trainable_vars)
        
        # Apply gradients
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        
        # Update metrics
        self.total_loss_tracker.update_state(total_loss)
        self.mse_loss_tracker.update_state(mse_loss)
        self.phys_loss_tracker.update_state(phys_loss)
        
        return {
            "total_loss": self.total_loss_tracker.result(),
            "mse_loss": self.mse_loss_tracker.result(),
            "physics_loss": self.phys_loss_tracker.result()
        }

    def test_step(self, data):
        x, y = data
        y_pred = self(x, training=False)
        
        mse_loss = tf.reduce_mean(tf.keras.losses.mean_squared_error(y, y_pred))
        phys_loss = spectrogram_physics_loss(y, y_pred)
        total_loss = mse_loss + 0.1 * phys_loss
        
        return {
            "total_loss": total_loss,
            "mse_loss": mse_loss,
            "physics_loss": phys_loss
        }

# Initialize and compile
trainer = SpectrogramTrainer(model)
trainer.compile(optimizer=tf.keras.optimizers.Adam(0.001))

# Train the model
history = trainer.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_total_loss",
            patience=5,
            restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_total_loss",
            factor=0.5,
            patience=3
        )
    ]
)

Epoch 1/50


AttributeError: module 'keras._tf_keras.keras.losses' has no attribute 'mean_squared_error'