<a href="https://colab.research.google.com/github/Shoaib1M/hackodishav1/blob/main/Audio_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import csv

import matplotlib.pyplot as plt
from IPython.display import Audio
from scipy.io import wavfile

model = hub.load('https://tfhub.dev/google/yamnet/1')

In [None]:
def class_name_csv(class_map_csv_text):
  class_names=[]
  with tf.io.gfile.GFile(class_map_csv_text) as csvfile:
    reader=csv.DictReader(csvfile)
    for row in reader:
      class_names.append(row["display_name"])
  return class_names

class_map_path=model.class_map_path().numpy()
class_names=class_name_csv(class_map_path)

In [None]:
import scipy.signal

def ensure_sample(original_sample_rate,waveform,desired_rate=16000):
  if original_sample_rate!=desired_rate:
    desired_lenght=int(round((float(len(waveform))/original_sample_rate)*16000))
    waveform= scipy.signal.resample(waveform,desired_lenght)
  return desired_rate,waveform

In [None]:
!curl -O https://storage.googleapis.com/audioset/speech_whistling2.wav

In [None]:
!curl -O https://storage.googleapis.com/audioset/miaow_16k.wav

In [None]:
# --- Step 1: Install the necessary library for audio conversion ---
!pip install pydub

from google.colab import files
from google.colab import output
from scipy.io import wavfile
from IPython.display import Audio, display, HTML
from base64 import b64decode
from pydub import AudioSegment
import ipywidgets as widgets
import numpy as np
import io

# --- CORRECTED JavaScript for Audio Recording ---
AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
my_btn.textContent = "Start Recording";

my_div.appendChild(my_p);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var recorder, gumStream;
var recordButton = my_btn;

var data = new Promise(resolve => {
    recordButton.onclick = () => {
        if (recorder && recorder.state == "recording") {
            recorder.stop();
            gumStream.getAudioTracks()[0].stop();
            recordButton.innerText = "Processing...";
        }
    };

    var handleSuccess = function(stream) {
      gumStream = stream;
      recorder = new MediaRecorder(stream);
      let chunks = [];

      recorder.ondataavailable = function(e) {
          chunks.push(e.data);
      };

      recorder.onstop = function(e) {
          var blob = new Blob(chunks, { 'type' : 'audio/webm; codecs=opus' });
          var reader = new FileReader();
          reader.readAsDataURL(blob);
          reader.onloadend = function() {
              base64data = reader.result;
              resolve(base64data.toString());
          }
      };

      recorder.start();
      recordButton.innerText = "Recording... press to stop";
    };

    navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);
});
</script>
"""

def get_audio():
  display(HTML(AUDIO_HTML))
  data = output.eval_js('data')

  if ',' not in data:
      print("Audio data could not be retrieved. Please try recording again.")
      return None

  binary = b64decode(data.split(',')[1])

  # Use pydub to convert from webm to wav
  try:
    sound = AudioSegment.from_file(io.BytesIO(binary), format="webm")
    sound.export("recording.wav", format="wav")
    return "recording.wav"
  except Exception as e:
    print(f"Error during audio conversion: {e}")
    return None


# --- Global variable to store the waveform ---
waveform = None

# Create a file upload widget
uploader = widgets.FileUpload(
    accept='.wav',
    multiple=False,
    description='Upload .wav file'
)

# Create a text input widget for the file path
text_input = widgets.Text(
    value='/content/drive/MyDrive/test/565103__hudehel__train-station-announcement.wav',
    placeholder='Enter path to .wav file',
    description='File Path:',
    disabled=False
)

#Create a button to trigger audio recording
record_button = widgets.Button(
    description="Record Audio",
    button_style='info',
    tooltip='Click to record audio from your microphone'
)

# Display the widgets
display(uploader)
display(text_input)
display(record_button)


def process_audio(wav_file_path=None, uploaded_file=None):
    global waveform
    if uploaded_file:
        wav_file_name = next(iter(uploaded_file))
        wav_data = uploaded_file[wav_file_name]['content']
        with open(wav_file_name, 'wb') as f:
            f.write(wav_data)
        wav_file_path = wav_file_name

    try:
        sample_rate, new_waveform = wavfile.read(wav_file_path, mmap=False)
        sample_rate, new_waveform = ensure_sample(sample_rate, new_waveform)
        duration = len(new_waveform) / sample_rate
        waveform = new_waveform # Update the global waveform variable

        print(f'Sample Rate: {sample_rate} Hz')
        print(f'Duration: {duration:.2f} s')
        print(f'Size of input: {len(waveform)}')

        print("\nPlaying the audio that is about to be classified...")
        waveform_flat = waveform.ravel()
        int16_info = np.iinfo(np.int16)
        clipped_waveform = np.clip(waveform_flat, int16_info.min, int16_info.max)
        waveform_int = clipped_waveform.astype(np.int16)
        display(Audio(waveform_int, rate=int(sample_rate)))

        print("\n\n✅ New audio loaded successfully!")
        print("You can now run the next cells to classify this audio.")

    except FileNotFoundError:
        print(f"File not found at the specified path: {wav_file_path}")
        waveform = None
    except Exception as e:
        print(f"An error occurred: {e}")
        waveform = None

def on_upload_change(change):
    if uploader.value:
        process_audio(uploaded_file=uploader.value)

def on_record_button_clicked(b):
    print("Starting recording...")
    audio_path = get_audio()
    if audio_path:
        print(f"Recording saved to: {audio_path}")
        process_audio(wav_file_path=audio_path)

uploader.observe(on_upload_change, names='value')
record_button.on_click(on_record_button_clicked)

In [None]:
waveform_float = tf.cast(waveform, tf.float32)
normal_waveform=waveform_float/tf.int16.max

In [None]:
import tensorflow as tf
waveform = tf.convert_to_tensor(waveform, dtype=tf.float32)
if len(normal_waveform.shape) > 1:
    normal_waveform = tf.reduce_mean(normal_waveform, axis=1)

score, embeddings, spectrogram=model(normal_waveform)
scores_np=score.numpy()
#int_scores=scores_np.astype(np.int32)
infered_class=class_names[scores_np.mean(axis=0).argmax()]
dic={}
t_sum=0
tt_sum=0
x=0
for i in range(len(scores_np.mean(axis=0))):
  dic[class_names[i]]=scores_np.mean(axis=0)[i]*100
sorted_dic = sorted(dic.items(), key=lambda item: item[1], reverse=True)
for key,value in sorted_dic[:10]:
  if key=="Silence" or key=="Sound effect":
    x+=1
for key,value in sorted_dic[:10+x]:
  if key!="Silence" and key!="Sound effect":
    t_sum+=value
for key,value in sorted_dic[:10+x]:
  if key!="Silence" and key!="Sound effect":
    print(f"{key}:{(value/t_sum)*100}")