## Authors:
Pavly Salah Zaki (section 2)

Bolis Karam Soliman (section 2)

Marco Magdy William (section 3)

### Install dependencies

*Librosa* conda install -c conda-forge librosa

*PyAudio* pip install pyaudio

### Import libraries

In [17]:
import pyaudio
import wave

import librosa
import librosa.display

import matplotlib.pyplot as plt
%matplotlib tk

import numpy as np

import struct
from scipy.fftpack import fft

from tkinter import TclError
import time

### Global constants

In [18]:
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024*2

RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "Recorded_audio.wav"

### Visualize audio signal in real-time

In [24]:
# PyAudio instance
audio = pyaudio.PyAudio()

# Create an audio stream
stream = audio.open(
                format=FORMAT, 
                channels=CHANNELS,
                rate=RATE, 
                input=True,
                output=True,
                frames_per_buffer=CHUNK
)


# Create a Matplot figure
fig, (ax1, ax2) = plt.subplots(2, figsize=(15, 7))

'''Waveform'''
# Some needed garbage
x = np.arange(0, 2*CHUNK, 2)
line, = ax1.plot(x, np.random.rand(CHUNK), '-', lw=2)

# Set limits for axii
ax1.set_title('AUDIO WAVEFORM')
ax1.set_xlabel('Time')
ax1.set_ylabel('Amplitude')
ax1.set_ylim(0, 255)
ax1.set_xlim(0, 2 * CHUNK)
plt.setp(ax1, xticks=[0, CHUNK, 2 * CHUNK], yticks=[0, 128, 255])


'''Spectrum'''
# create semilogx line for spectrum
x_fft = np.linspace(0, RATE, CHUNK)
line_fft, = ax2.semilogx(x_fft, np.random.rand(CHUNK), '-', lw=2)
ax2.set_xlim(20, RATE / 2)
ax2.set_title('Frequency spectrum after FFT')
ax2.set_xlabel('log(w)')
ax2.set_ylabel('Amplitude Y(k)')

print('Recording started')
frames = []

# for measuring frame rate
frame_count = 0
start_time = time.time()

# This is where the magic happens
while True:
    # Read a {CHUNK} of audio
    data = stream.read(CHUNK)
    frames.append(data)

    # Convert bytes to int... Then take every 2 samples... Add 128 (to center the signal)
    data_int = np.array(struct.unpack(str(2*CHUNK) + 'B', data), dtype='b')[::2] + 128
    line.set_ydata(data_int)
    
    # compute FFT and update line
    y_fft = fft(data_int)
    line_fft.set_ydata(np.abs(y_fft[0:CHUNK])  / (128 * CHUNK))    # amplitude = 128
    
    try:
        fig.canvas.draw()
        fig.canvas.flush_events()
        frame_count += 1
            
    # Matplotlib tk raises an error after quitting
    except TclError:
        # calculate FPS
        frame_rate = frame_count / (time.time() - start_time)

        print("Finished recording")    # Yay!
        print('average frame rate = {:.0f} FPS'.format(frame_rate))

        stream.stop_stream()
        stream.close()
        audio.terminate()

        break

Recording started
Finished recording
average frame rate = 2 FPS


### Record for {RECORD_SECONDS} number of seconds

In [8]:
'''
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    try:
        data = stream.read(CHUNK)
        frames.append(data)
    except KeyboardInterrupt:
        stream.stop_stream()
        stream.close()
        audio.terminate()
'''    

'\n# Record for {RECORD_SECONDS} number of seconds\n# for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n    try:\n        data = stream.read(CHUNK)\n        frames.append(data)\n    except KeyboardInterrupt:\n        stream.stop_stream()\n        stream.close()\n        audio.terminate()\n'

### Save the recorded audio

In [22]:
with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as waveFile:
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(frames))

### Play the audio from Anaconda Notebook

Say whaaaaaaa!!

In [23]:
import IPython.display
IPython.display.Audio(WAVE_OUTPUT_FILENAME, rate = 44100)

### Visualize the recorded audio

In [16]:
audio_file, sampling_rate = librosa.load(WAVE_OUTPUT_FILENAME)

plt.figure(figsize=(14, 5))
librosa.display.waveplot(audio_file, sr=sampling_rate)

<matplotlib.collections.PolyCollection at 0x1c3986c76a0>

# TO BE DONE

1. Access the microphone
2. Visualize what is said in both time and frequency domains
3. Run STT
4. Open an application based on what is said