<a href="https://colab.research.google.com/github/Bris-T/Bris-T1/blob/main/spectrogram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Voice Recorder to File

This cell records sound from microphone and stores it locally to file "sound.wav".

Install necessary libraries:

In [1]:
!pip install ffmpeg-python
!pip install PySoundFile
import soundfile as sf

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Collecting PySoundFile
  Downloading PySoundFile-0.9.0.post1-py2.py3-none-any.whl.metadata (9.4 kB)
Downloading PySoundFile-0.9.0.post1-py2.py3-none-any.whl (24 kB)
Installing collected packages: PySoundFile
Successfully installed PySoundFile-0.9.0.post1


In [2]:
#@title Recording audio from microphone, setup: {display-mode: "form"}
"""
From:
https://colab.research.google.com/gist/ricardodeazambuja/03ac98c31e87caf284f7b06286ebf7fd/microphone-to-numpy-array-from-your-browser-in-colab.ipynb
Here are some of the possible references:
https://blog.addpipe.com/recording-audio-in-the-browser-using-pure-html5-and-minimal-javascript/
https://stackoverflow.com/a/18650249
https://hacks.mozilla.org/2014/06/easy-audio-capture-with-the-mediarecorder-api/
https://air.ghost.io/recording-to-an-audio-file-using-html5-and-js/
https://stackoverflow.com/a/49019356
"""
from IPython.display import HTML, Audio
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
from scipy.io.wavfile import read as wav_read
import io
import ffmpeg

AUDIO_HTML = """
<script>
var my_div = document.createElement("DIV");
var my_p = document.createElement("P");
var my_btn = document.createElement("BUTTON");
var t = document.createTextNode("Press to start recording");

my_btn.appendChild(t);
//my_p.appendChild(my_btn);
my_div.appendChild(my_btn);
document.body.appendChild(my_div);

var base64data = 0;
var reader;
var recorder, gumStream;
var recordButton = my_btn;

var handleSuccess = function(stream) {
  gumStream = stream;
  var options = {
    //bitsPerSecond: 8000, //chrome seems to ignore, always 48k
    mimeType : 'audio/webm;codecs=opus'
    //mimeType : 'audio/webm;codecs=pcm'
  };
  //recorder = new MediaRecorder(stream, options);
  recorder = new MediaRecorder(stream);
  recorder.ondataavailable = function(e) {
    var url = URL.createObjectURL(e.data);
    var preview = document.createElement('audio');
    preview.controls = true;
    preview.src = url;
    document.body.appendChild(preview);

    reader = new FileReader();
    reader.readAsDataURL(e.data);
    reader.onloadend = function() {
      base64data = reader.result;
      //console.log("Inside FileReader:" + base64data);
    }
  };
  recorder.start();
  };

recordButton.innerText = "Recording... press to stop";

navigator.mediaDevices.getUserMedia({audio: true}).then(handleSuccess);


function toggleRecording() {
  if (recorder && recorder.state == "recording") {
      recorder.stop();
      gumStream.getAudioTracks()[0].stop();
      recordButton.innerText = "Saving the recording... pls wait!"
  }
}

// https://stackoverflow.com/a/951057
function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

var data = new Promise(resolve=>{
//recordButton.addEventListener("click", toggleRecording);
recordButton.onclick = ()=>{
toggleRecording()

sleep(2000).then(() => {
  // wait 2000ms for the data to be available...
  // ideally this should use something like await...
  //console.log("Inside data:" + base64data)
  resolve(base64data.toString())

});

}
});

</script>
"""

def get_audio():
  display(HTML(AUDIO_HTML))
  data = eval_js("data")
  binary = b64decode(data.split(',')[1])

  process = (ffmpeg
    .input('pipe:0')
    .output('pipe:1', format='wav')
    .run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
  )
  output, err = process.communicate(input=binary)

  riff_chunk_size = len(output) - 8
  # Break up the chunk size into four bytes, held in b.
  q = riff_chunk_size
  b = []
  for i in range(4):
      q, r = divmod(q, 256)
      b.append(r)

  # Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
  riff = output[:4] + bytes(b) + output[8:]

  sr, audio = wav_read(io.BytesIO(riff))

  return audio, sr

##Record sound, listen and store to file "sound.wav":

In [10]:
audio, sr = get_audio()
print("sampling rate=", sr)
sf.write('sound.wav', audio, sr)

sampling rate= 48000


#Interactive Spectrogram with Zoom

Load the audio signal for the spectrogram using the "Files" panel on the left, then copy its path into the text field argument below, and run that cell. Verify by listening to its mono version by clicking on the playback button below.

Then in the "Spectrogram" cell you can change the FFT lenght of the Short Time Fourier Transfrom (STFT) for the spectrogram, and the number of times the STFT windows overlap, if desired. Then run the cell. The spectrogram has "hover", zoom, and "save" tools.

In [11]:
import scipy.io.wavfile as wav
import numpy as np
from IPython.display import Audio, display

filename = '/content/sound.wav' #@param {'type': 'string'}
rate, snd = wav.read(filename)
print("Data Type:", snd.dtype, "rate=", rate)

#Convert to multichannel
if len(snd.shape)==1 : #mono
  #mono=np.around((snd[:,0]/2+snd[:,1]/2)).astype(np.int16)
  snd=np.expand_dims(snd, axis=1) #add channels dimensio

channels=snd.shape[1]
print("channels=", channels)
siglen=len(snd)/rate

print("Listen to it:")
for chan in range(channels):
  print("channel "+str(chan)+":")
  display(Audio(snd[:,chan],rate=rate))


Data Type: int16 rate= 48000
channels= 1
Listen to it:
channel 0:


In [12]:
#@title Spectrogram with Tools {run: "auto" }
import scipy.signal as signal
import matplotlib.pyplot as plt

import bokeh.io
bokeh.io.output_notebook()
import bokeh.plotting
from bokeh.palettes import Spectral11
#from bokeh.models import HoverTool
from bokeh.plotting import figure

#Example: https://docs.bokeh.org/en/2.4.0/docs/gallery/image.html

#hovertool: https://docs.bokeh.org/en/latest/docs/user_guide/tools.html#hovertool

fftlen= "2048" #@param [256,2048, 8192]
fftlen=int(fftlen)
timesoverlap= "2" #@param [2, 4, 8, 16 ]
#This many timer are the windows overlapped
timesoverlap=int(timesoverlap)

noverlap=fftlen -fftlen//timesoverlap

for chan in range(channels):
  freq,t,y = signal.stft(snd[:,chan],fs=rate, nperseg=fftlen, noverlap = noverlap)
  magdb=20*np.log10(np.abs(y)+1e-6) #Values in dB

  print("max(magdb)=",np.max(magdb), "min(magdb)=", np.min(magdb), "magdb.shape=",magdb.shape )

  p = figure(title='Spectrogram channel '+str(chan), x_axis_label= 'Time (s)', y_axis_label='Frequency (Hz)',tooltips=[("time", "$x"), ("freq.:", "$y"), ("dB", "@image")])
  #p.x_range.range_padding = p.y_range.range_padding = 0
  p.image(image=[magdb], x=0, y=0, dw=siglen, dh=rate/2, palette="Spectral11", level="image")
  p.grid.grid_line_width = 0.5
  bokeh.plotting.show(p)



max(magdb)= 83.81973 min(magdb)= -108.06464 magdb.shape= (1025, 249)
