In [1]:
# Install necessary packages
!pip install tensorflow tensorflow-hub librosa matplotlib pandas

import tensorflow as tf
import tensorflow_hub as hub
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import os

# ✅ Step 1: Load YAMNet Model from TensorFlow Hub
yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
yamnet = hub.load(yamnet_model_handle)
print("✅ YAMNet model loaded successfully!")

# ✅ Step 2: Download a sample audio file
audio_url = "https://storage.googleapis.com/audioset/miaow_16k.wav"
audio_path = "miaow_16k.wav"

if not os.path.exists(audio_path):
    response = requests.get(audio_url)
    with open(audio_path, "wb") as f:
        f.write(response.content)
    print(f"✅ Sample test audio downloaded: {audio_path}")
else:
    print(f"✅ Using existing audio file: {audio_path}")

# ✅ Step 3: Load Audio and Preprocess
def load_audio(file_path):
    wav_data, sr = librosa.load(file_path, sr=16000, mono=True)
    return wav_data

waveform = load_audio(audio_path)

# ✅ Step 4: Run YAMNet for Sound Classification
scores, embeddings, spectrogram = yamnet(waveform)

# ✅ Step 5: Get the Most Probable Sound Class
class_map_path = hub.load("https://tfhub.dev/google/yamnet/class_map/1").numpy()
predicted_class = class_map_path[np.argmax(scores)]
print(f"✅ YAMNet Prediction: The main sound is: {predicted_class}")

# ✅ Step 6: Train a Simple Model using YAMNet Embeddings
X = embeddings.numpy()
y = np.random.randint(0, 2, size=(X.shape[0],))  # Fake binary labels for testing

# ✅ Define & Train a Simple Classifier
model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation="relu", input_shape=(X.shape[1],)),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X, y, epochs=5, batch_size=8, verbose=1)

# ✅ Step 7: Save the Model
model.save("yamnet_classifier.h5")
print("✅ Model training complete and saved as 'yamnet_classifier.h5'")


✅ YAMNet model loaded successfully!
✅ Sample test audio downloaded: miaow_16k.wav


OSError: https://tfhub.dev/google/yamnet/class_map/1 does not appear to be a valid module.

In [1]:
!rm -rf /root/.keras/models/yamnet*
!wget -O /root/.keras/models/yamnet.h5 https://storage.googleapis.com/tfhub-modules/google/yamnet/1.tar.gz


/root/.keras/models/yamnet.h5: No such file or directory


In [2]:
!pip install --upgrade tensorflow-hub




In [3]:
import tensorflow_hub as hub
model_url = "https://tfhub.dev/google/yamnet/1"
yamnet_model = hub.load(model_url)
print("✅ YAMNet model loaded successfully!")


✅ YAMNet model loaded successfully!


In [5]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import pandas as pd
import os
import urllib.request

# ✅ Load YAMNet model
yamnet_model = hub.load("https://tfhub.dev/google/yamnet/1")
print("✅ YAMNet model loaded successfully!")

# ✅ Download a sample test audio
AUDIO_URL = "https://storage.googleapis.com/audioset/miaow_16k.wav"
AUDIO_FILE = "miaow_16k.wav"
urllib.request.urlretrieve(AUDIO_URL, AUDIO_FILE)
print(f"✅ Sample test audio downloaded: {AUDIO_FILE}")

# ✅ Load and process the audio file
def load_wav_16k_mono(filename):
    wav, sr = librosa.load(filename, sr=16000, mono=True)
    return wav, sr

waveform, sr = load_wav_16k_mono(AUDIO_FILE)

# ✅ Run YAMNet model for classification
scores, embeddings, spectrogram = yamnet_model(waveform)

# ✅ Load class map safely
class_map_path = yamnet_model.class_map_path().numpy().decode("utf-8")
class_map_df = pd.read_csv(class_map_path)

# ✅ Get top predicted category safely
top_index = np.argmax(scores.numpy())
if top_index >= len(class_map_df):
    print(f"⚠️ Warning: Predicted index {top_index} is out of range!")
    predicted_label = "Unknown"
else:
    predicted_label = class_map_df.loc[top_index, "display_name"]

print(f"✅ YAMNet Prediction: The main sound is: {predicted_label}")
print(f"✅ Embeddings shape: {embeddings.shape}")


✅ YAMNet model loaded successfully!
✅ Sample test audio downloaded: miaow_16k.wav
✅ YAMNet Prediction: The main sound is: Unknown
✅ Embeddings shape: (13, 1024)
