In [None]:
from transformers import AutoFeatureExtractor, AutoModel
import torch
import librosa

# load PANNs-like model (torchvggish is trained on AudioSet)
extractor = AutoFeatureExtractor.from_pretrained("harritaylor/torchvggish")
model = AutoModel.from_pretrained("harritaylor/torchvggish")

# load an audio file (wav, 16kHz mono)
y, sr = librosa.load("forest_sample.wav", sr=16000, mono=True)

inputs = extractor(y, sampling_rate=sr, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

print(outputs.last_hidden_state.shape)  # embedding vector


In [None]:
!pip install --upgrade pip setuptools wheel
!pip install --upgrade numpy
!pip install --upgrade pandas scipy torch librosa


In [None]:
!pip install --upgrade --force-reinstall numpy pandas scipy torch librosa


In [None]:
!pip install --upgrade --force-reinstall numpy==1.26.4 pandas==2.2.2 scipy==1.13.1 librosa==0.10.2

In [None]:
from transformers import AutoProcessor, AutoModel
import torch, librosa

# Load feature extractor + model
processor = AutoProcessor.from_pretrained("qiuqiangkong/panns")
model = AutoModel.from_pretrained("qiuqiangkong/panns")

# Example audio
waveform, sr = librosa.load("forest_sound.wav", sr=32000, mono=True)
inputs = processor(waveform, sampling_rate=sr, return_tensors="pt", padding=True)

with torch.no_grad():
    embeddings = model(**inputs).last_hidden_state.mean(dim=1)  # pooled embedding

print("Embedding shape:", embeddings.shape)


In [None]:
!pip install torch torchvision torchaudio
!pip install librosa==0.10.1 soundfile
!pip install h5py pandas tqdm


In [None]:
!mkdir -p pretrained
!wget -O pretrained/Cnn14.pth https://zenodo.org/record/3987831/files/Cnn14.pth?download=1

In [None]:
import requests, os

os.makedirs("pretrained", exist_ok=True)
url = "https://zenodo.org/record/3987831/files/Cnn14.pth?download=1"
r = requests.get(url, stream=True)

with open("pretrained/Cnn14.pth", "wb") as f:
    for chunk in r.iter_content(chunk_size=8192):
        if chunk:
            f.write(chunk)

print("✅ Download complete: pretrained/Cnn14.pth")


In [3]:
import sys
import torch

# Add repo path (adjust if needed)
sys.path.append("audioset_tagging_cnn/pytorch")

from models import Cnn14

# Path to your model
checkpoint_path = "/Users/prasad/pretrained/Cnn14.pth"

# Initialize model
model = Cnn14(sample_rate=32000, window_size=1024, hop_size=320,
              mel_bins=64, fmin=50, fmax=14000, classes_num=527)

# Load checkpoint safely
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"])
model.eval()

print("✅ Model loaded successfully")


UnpicklingError: invalid load key, '\x0a'.

In [4]:
import sys
sys.path.append("audioset_tagging_cnn/pytorch")

from models import Cnn14

In [5]:
pip install torchlibrosa

Note: you may need to restart the kernel to use updated packages.


In [None]:
new one 






In [None]:
import sys
sys.path.append("audioset_tagging_cnn/pytorch")

from models import Cnn14

In [1]:
!mkdir -p /Users/prasad/pretrained
!curl -L -o /Users/prasad/pretrained/Cnn14.pth https://zenodo.org/record/3987831/files/Cnn14.pth


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   251  100   251    0     0    188      0  0:00:01  0:00:01 --:--:--   188
100 14206  100 14206    0     0   5498      0  0:00:02  0:00:02 --:--:-- 15193


In [2]:
ls -lh /Users/prasad/Documents/Major_Project/Cnn14.pth

-rw-r--r--@ 1 prasad  staff   312M  1 Sep 10:55 /Users/prasad/Documents/Major_Project/Cnn14.pth


In [7]:
import torch
import torch.nn as nn
import torchaudio
import sys
import os

# Add audioset repo to path (adjust if your repo is in another folder)
sys.path.append("audioset_tagging_cnn")

import sys
sys.path.append("audioset_tagging_cnn")  # path to the cloned repo

from pytorch.pytorch_utils import move_data_to_device
from pytorch.models import Cnn14

# -----------------------------
# 1. Load pretrained model
# -----------------------------
checkpoint_path = "/Users/prasad/Documents/Major_Project/Cnn14.pth"  # change if needed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Cnn14(sample_rate=32000, window_size=1024, hop_size=320,
              mel_bins=64, fmin=50, fmax=14000, classes_num=527)

checkpoint = torch.load(checkpoint_path, map_location=device)
model.load_state_dict(checkpoint["model"])
model.to(device)
model.eval()

print("✅ Model loaded successfully!")

# -----------------------------
# 2. Load an audio file
# -----------------------------
wav_path = "/Users/prasad/Documents/Major_Project/machine_sound.wav"  # replace with your file
waveform, sr = torchaudio.load(wav_path)

#import torchaudio
#wav_path = "video.mp4"  # your mp4 file
#waveform, sr = torchaudio.load(wav_path)  # works if ffmpeg backend is enabled

# Resample if not 32kHz
if sr != 32000:
    waveform = torchaudio.transforms.Resample(orig_freq=sr, new_freq=32000)(waveform)
    sr = 32000

# Mono
if waveform.shape[0] > 1:
    waveform = torch.mean(waveform, dim=0, keepdim=True)

# -----------------------------
# 3. Get model prediction
# -----------------------------
with torch.no_grad():
    waveform = move_data_to_device(waveform, device)
    output_dict = model(waveform, None)

    # Clip-level probabilities (527 AudioSet classes)
    clipwise_output = output_dict["clipwise_output"].cpu().numpy()[0]

print("✅ Inference done!")
print("Predictions shape:", clipwise_output.shape)  # should be (527,)

# -----------------------------
# 4. Map class IDs -> labels
# -----------------------------
import pandas as pd

# Use the class_labels_indices.csv file inside the repo
class_map_path = "audioset_tagging_cnn/metadata/class_labels_indices.csv"
df = pd.read_csv(class_map_path)

# Build index → label mapping
class_map = {int(row["index"]): row["display_name"] for _, row in df.iterrows()}


# Print top-10 predictions
topk = 10
top_indices = clipwise_output.argsort()[-topk:][::-1]

print("\n🎯 Top Predictions:")
for idx in top_indices:
    print(f"{idx}: {class_map[idx]} ({clipwise_output[idx]:.3f})")


✅ Model loaded successfully!
✅ Inference done!
Predictions shape: (527,)

🎯 Top Predictions:
137: Music (0.167)
411: Sewing machine (0.099)
506: Inside, small room (0.085)
343: Engine (0.080)
513: Noise (0.072)
300: Vehicle (0.052)
138: Musical instrument (0.040)
0: Speech (0.035)
523: Vibration (0.031)
348: Medium engine (mid frequency) (0.024)


In [34]:
!wget https://raw.githubusercontent.com/qiuqiangkong/audioset_tagging_cnn/master/utils/class_labels_indices.csv -O class_labels_indices.csv

--2025-09-01 11:23:21--  https://raw.githubusercontent.com/qiuqiangkong/audioset_tagging_cnn/master/utils/class_labels_indices.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8001::154, 2606:50c0:8002::154, 2606:50c0:8003::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8001::154|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-09-01 11:23:21 ERROR 404: Not Found.



In [39]:
!curl -L -o class_labels_indices.csv "https://raw.githubusercontent.com/qiuqiangkong/audioset_tagging_cnn/master/utils/class_labels_indices.csv"


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100    14  100    14    0     0     40      0 --:--:-- --:--:-- --:--:--    40


In [40]:
ls -lh class_labels_indices.csv

-rw-r--r--@ 1 prasad  staff    14B  1 Sep 11:27 class_labels_indices.csv


In [None]:
                                                            Post Processing

In [8]:
import numpy as np

# Example: clipwise_output is your prediction vector of shape (527,)
# It should be float32/float64 values between 0–1
clipwise_output = output_dict["clipwise_output"].cpu().numpy()[0].astype(float)

# Define which classes are ILLEGAL (IDs from ontology.csv)
illegal_class_ids = {
    427: "Gunshot, gunfire",
    428: "Machine gun",
    431: "Cap gun",
    377: "Chainsaw",
    370: "Explosion",
    415: "Rifle",
    416: "Pistol",
    # add more if you want (e.g. "Bomb", "Siren" if used for poaching alerts)
}

# Confidence threshold
THRESHOLD = 0.2  

# Collect illegal predictions
illegal_preds = []
for idx, name in illegal_class_ids.items():
    score = float(clipwise_output[idx])   # ensure float
    if score > THRESHOLD:
        illegal_preds.append((name, score))

# Final classification
if illegal_preds:
    print("🚨 ILLEGAL activity detected!")
    for name, score in illegal_preds:
        print(f"   - {name}: {score:.3f}")
else:
    print("✅ LEGAL / No illegal sound detected.")


✅ LEGAL / No illegal sound detected.


In [9]:
import torch
import numpy as np
import torchaudio
import pyaudio
import time
import sys
sys.path.append("audioset_tagging_cnn")  # path to the cloned repo

from pytorch.pytorch_utils import move_data_to_device
from pytorch.models import Cnn14

# -----------------------
# Config
# -----------------------
ILLEGAL_CLASSES = ["Gunshot, gunfire", "Machine gun", "Chainsaw", "Explosion", "Cap gun","Machine gun","Mechanisms"]
THRESHOLD = 0.2  # probability cutoff

# Load pretrained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Cnn14(sample_rate=32000, window_size=1024, hop_size=320,
              mel_bins=64, fmin=50, fmax=14000, classes_num=527)

checkpoint = torch.load("Documents/Major_Project/Cnn14.pth", map_location=device)
model.load_state_dict(checkpoint["model"])
model.to(device)
model.eval()

# Load class labels
import pandas as pd
labels = pd.read_csv("audioset_tagging_cnn/metadata/class_labels_indices.csv")
id_to_label = {int(row["index"]): row["display_name"] for _, row in labels.iterrows()}

# -----------------------
# Microphone Setup
# -----------------------
CHUNK = 32000 * 2  # 2 sec of audio at 32kHz
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = 32000

p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

print("🎤 Listening for illegal activities...")

# -----------------------
# Real-time loop
# -----------------------
while True:
    data = stream.read(CHUNK, exception_on_overflow=False)
    audio = np.frombuffer(data, dtype=np.float32)
    waveform = torch.tensor(audio).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(waveform, None)
        clipwise_output = output["clipwise_output"].cpu().numpy()[0]

    # Top prediction
    top_idx = int(np.argmax(clipwise_output))
    top_label = id_to_label[top_idx]
    top_prob = clipwise_output[top_idx]

    print(f"Detected: {top_label} ({top_prob:.2f})")

    # Check if illegal
    if top_label in ILLEGAL_CLASSES and top_prob > THRESHOLD:
        print("🚨 ALERT: Illegal activity detected! 🚨")
        # TODO: Send ROS message or trigger actuator here

    time.sleep(1)  # small delay

🎤 Listening for illegal activities...
Detected: Vehicle (0.14)
Detected: Vehicle (0.45)
Detected: Vehicle (0.58)
Detected: Vehicle (0.46)
Detected: Vehicle (0.32)
Detected: Vehicle (0.52)
Detected: Vehicle (0.58)
Detected: Machine gun (0.77)
🚨 ALERT: Illegal activity detected! 🚨
Detected: Speech (0.81)
Detected: Jackhammer (0.71)
Detected: Machine gun (0.24)
🚨 ALERT: Illegal activity detected! 🚨
Detected: Machine gun (0.44)
🚨 ALERT: Illegal activity detected! 🚨
Detected: Rub (0.11)
Detected: Typing (0.32)
Detected: Tick (0.14)
Detected: Typing (0.67)
Detected: Typing (0.38)
Detected: Machine gun (0.28)
🚨 ALERT: Illegal activity detected! 🚨
Detected: Typing (0.20)
Detected: Vehicle (0.37)


KeyboardInterrupt: 

In [4]:
!brew install portaudio
!pip install pyaudio

[34m==>[0m [1mAuto-updating Homebrew...[0m
Adjust how often this is run with `$HOMEBREW_AUTO_UPDATE_SECS` or disable with
`$HOMEBREW_NO_AUTO_UPDATE=1`. Hide these hints with `$HOMEBREW_NO_ENV_HINTS=1` (see `man brew`).
[34m==>[0m [1mAuto-updated Homebrew![0m
Updated 2 taps (homebrew/core and homebrew/cask).
[34m==>[0m [1mNew Formulae[0m
darker: Apply Black formatting only in regions changed since last commit
docker-debug: Use new container attach on already container go on debug

You have [1m26[0m outdated formulae installed.

[32m==>[0m [1mFetching downloads for: [32mportaudio[39m[0m
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/portaudio/manifests/19.7.0-1[0m
######################################################################### 100.0%
[32m==>[0m [1mFetching [32mportaudio[39m[0m
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/portaudio/blobs/sha256:8ad9f1c1[0m
################################################################

In [12]:
import torch

# Path where you want to save
save_path = "illegal_detector.pth"

# Save model state
torch.save(model.state_dict(), save_path)

print(f"✅ Model saved to {save_path}")


✅ Model saved to illegal_detector.pth
