In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import math
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

In [None]:
# Load metadata
metadata_path = "/content/drive/MyDrive/ddp/gunshot-audio-all-metadata.csv"
data_dir = "/content/drive/MyDrive/ddp/ALL_gunshot"
metadata_df = pd.read_csv(metadata_path)
# Confirm type
print(type(metadata_df))

In [None]:
import os
import pandas as pd

# Filter for MiniDSP UMA-8 and Raspberry Pi
filtered_df = metadata_df[
    (metadata_df["microphone"].str.contains("MiniDSP UMA-8", na=False)) &
    (metadata_df["device_manufacturer"].str.contains("Raspberry Pi", na=False))
]

# Drop duplicates to get unique UUIDs
filtered_uuids_df = filtered_df.drop_duplicates(subset=["uuid"])
uuid_list = filtered_uuids_df["uuid"].unique().tolist()

# Function to check presence of all 7 audio files
def get_valid_uuids_with_all_channels(uuid_list, data_dir):
    valid_uuids = []
    for uuid in uuid_list:
        all_exist = True
        for i in range(7):
            file_path = os.path.join(data_dir, f"{uuid}_chan{i}_v0.wav")
            if not os.path.exists(file_path):
                all_exist = False
                break
        if all_exist:
            valid_uuids.append(uuid)
    return valid_uuids

# Get the UUIDs with all 7 audio channels
valid_uuids = get_valid_uuids_with_all_channels(uuid_list, data_dir)

# Keep all rows (all channels) for valid UUIDs
filtered_metadata_all_channels = metadata_df[metadata_df['uuid'].isin(valid_uuids)]

# Save full rows per UUID
output_path = "/content/drive/MyDrive/ddp/filtered_metadata_156_all_channels.csv"
filtered_metadata_all_channels.to_csv(output_path, index=False)

print(f"Saved full channel metadata for {len(filtered_metadata_all_channels)} rows.")


In [None]:
import pandas as pd
import os

# Load metadata CSV
csv_path = "/content/drive/MyDrive/ddp/filtered_metadata_156_all_channels.csv"
data_dir = "/content/drive/MyDrive/ddp/ALL_gunshot"
metadata = pd.read_csv(csv_path)

# Check if all 7-channel WAV files exist
def has_all_channels(uuid, data_dir, num_channels=7):
    return all(os.path.exists(os.path.join(data_dir, f"{uuid}_chan{i}_v0.wav")) for i in range(num_channels))

# Filter to UUIDs with all files
metadata = metadata[metadata["uuid"].apply(lambda u: has_all_channels(u, data_dir))]
uuids = metadata["uuid"].unique()


In [None]:
import os
import numpy as np
import pandas as pd
import soundfile as sf
import librosa
import matplotlib.pyplot as plt
from PIL import Image

# Paths
metadata_csv = "/content/drive/MyDrive/ddp/filtered_metadata_156_all_channels.csv"
audio_dir = "/content/drive/MyDrive/ddp/ALL_gunshot"
output_dir = "/content/drive/MyDrive/ddp/mel-spectrograms"

# Load metadata CSV to get list of filenames
metadata = pd.read_csv(metadata_csv)
filenames = metadata[metadata["filename"].str.contains('_chan')]["filename"].astype(str).tolist()
  # ensure they are strings

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each audio file
for fname in filenames:
    # Construct full file path (assuming .wav extension if not present)
    audio_path = os.path.join(audio_dir, fname if fname.lower().endswith(".wav") else fname + ".wav")
    if not os.path.isfile(audio_path):
        print(f"File not found: {audio_path} - skipping.")
        continue
    try:
        # Load audio file (mono)
        y, sr = sf.read(audio_path)  # y is a 1D numpy array, sr is sample rate:contentReference[oaicite:7]{index=7}
    except Exception as e:
        print(f"Error reading {audio_path}: {e}")
        continue
    if y.ndim > 1:
        # In case of multi-channel (should not happen for mono files), take first channel
        y = y[:, 0]

    # Ensure audio is a NumPy array (soundfile returns numpy array already)
    y = np.array(y, dtype=float)
    # Handle empty or very short files
    if y.size == 0:
        print(f"Warning: {fname} is empty, skipping.")
        continue

    # Detect first gunshot peak (first large amplitude spike)
    abs_y = np.abs(y)
    max_val = abs_y.max()
    if max_val <= 1e-6:
        # Very low signal (almost silent)
        print(f"Warning: {fname} has near-zero signal.")
        peak_index = 0
    else:
        # Set threshold as a fraction of max amplitude (e.g., 20%)
        threshold = 0.2 * max_val
        peak_index = None
        # Find first index that is a local max above threshold:contentReference[oaicite:8]{index=8}
        for i in range(1, len(y) - 1):
            if abs_y[i] > threshold and abs_y[i] > abs_y[i-1] and abs_y[i] > abs_y[i+1]:
                peak_index = i
                break
        # If no peak above threshold found, use the index of global maximum
        if peak_index is None:
            peak_index = int(np.argmax(abs_y))
    # peak_index now holds the index of the first gunshot peak

    # Define 2-second segment around the peak
    segment_duration = 2  # seconds
    target_length = int(round(segment_duration * sr))  # number of samples in 2 seconds
    start_idx = peak_index - target_length // 2
    end_idx = start_idx + target_length
    # Adjust if out of bounds
    if start_idx < 0:
        start_pad = -start_idx
        start_idx = 0
    else:
        start_pad = 0
    if end_idx > len(y):
        end_pad = end_idx - len(y)
        end_idx = len(y)
    else:
        end_pad = 0

    # Extract the segment and pad with zeros if needed
    segment = y[start_idx:end_idx]
    if start_pad or end_pad:
        segment = np.pad(segment, (start_pad, end_pad), mode='constant', constant_values=0)
    # Now, `segment` is a 2-second audio clip centered (as much as possible) on the peak

    # Generate Mel spectrogram for the segment
    mel_spect = librosa.feature.melspectrogram(y=segment, sr=sr, n_mels=128, fmax=sr/2)
    mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)  # convert to dB scale:contentReference[oaicite:9]{index=9}

# Normalize to [0, 255] for image saving
    mel_min = mel_spect_db.min()
    mel_max = mel_spect_db.max()
    mel_spect_norm = (mel_spect_db - mel_min) / (mel_max - mel_min + 1e-6)
    mel_spect_img = (mel_spect_norm * 255).astype(np.uint8)

# Save the spectrogram as a PNG using PIL (grayscale)
    output_path = os.path.join(
        output_dir, os.path.splitext(os.path.basename(audio_path))[0] + ".png"
    )
    Image.fromarray(mel_spect_img).save(output_path)