In [1]:
# Import the necessary modules.
from tkinter import *
import tkinter as tk
import tkinter.messagebox
import pyaudio
import wave
import os
import numpy as np
import tensorflow as tf
import pathlib
import soundfile as sf
import librosa
from pydub import AudioSegment
from datetime import datetime

In [2]:
# Recreate the exact same model, including its weights and the optimizer
model = tf.keras.models.load_model('config/models/RNN_model.h5')

In [3]:
def decode_audio(audio_binary):
  audio, _ = tf.audio.decode_wav(contents=audio_binary)
  return tf.squeeze(audio, axis=-1)

def get_waveform(file_path):
  audio_binary = tf.io.read_file(file_path)
  waveform = decode_audio(audio_binary)
  return waveform

def get_spectrogram(waveform):
  input_len = 32500
  waveform = waveform[:input_len]
  zero_padding = tf.zeros(
      [32500] - tf.shape(waveform),
      dtype=tf.float32)
  waveform = tf.cast(waveform, dtype=tf.float32)
  equal_length = tf.concat([waveform, zero_padding], 0)
  spectrogram = tf.signal.stft(equal_length, frame_length=255, frame_step=128)
  spectrogram = tf.abs(spectrogram)
  spectrogram = spectrogram[..., tf.newaxis]
  return spectrogram

AUTOTUNE = tf.data.experimental.AUTOTUNE

def preprocess_dataset(files):
  files_ds = tf.data.Dataset.from_tensor_slices(files)
  output_ds = files_ds.map(
      map_func=get_waveform,
      num_parallel_calls=AUTOTUNE)
  output_ds = output_ds.map(
      map_func=get_spectrogram,
      num_parallel_calls=AUTOTUNE)
  return output_ds

In [4]:
#GUI class & record
class RecAUD:

    def __init__(self, chunk=1024, frmat=pyaudio.paInt16, channels=1, rate=22050, py=pyaudio.PyAudio()):
        # Start Tkinter and set Title
        self.main = Tk()
        self.main.geometry('400x500')
        self.main.title('Smart Meeting Minutes')
        #set variable for recording
        self.CHUNK = chunk
        self.FORMAT = frmat
        self.CHANNELS = channels
        self.RATE = rate
        self.p = py
        
        self.frames = []
        self.date = ""
        self.words = "Transcript: "
        self.st = 0
        self.totalSentences = 0
        
        #GUI msg
        self.programMsg = Label(self.main, text="""
        Click "Record Meeting" to record your meeting!
        Click "New Sentence" to proceed record your next sentence.
        Click "End Sentence" to end the current recording.
        Click "End Meeting" to end the meeting and produce report and recording
        """)
        self.SpeechMsg = Label(self.main, text="")
        # Set button Frames
        self.buttons = tkinter.Frame(self.main, padx=120, pady=20)
        # Pack Frames
        self.programMsg.pack()
        self.buttons.pack(fill=tk.BOTH)
        self.SpeechMsg.pack()
        #Buttons with grid
        self.startMeeting = Button(self.buttons, width=10, padx=10, pady=5, text='Start Meeting', command=lambda: self.start_Meeting())
        self.startMeeting.grid(row=0, column=0, padx=50, pady=5)
        self.newSentence = Button(self.buttons, width=10, padx=10, pady=5, state="disable", text='New Sentence', command=lambda: self.new_Sentence())
        self.newSentence.grid(row=1, column=0, columnspan=1, padx=50, pady=5)
        self.endSentence = Button(self.buttons, width=10, padx=10, pady=5, state="disable", text='End Sentence', command=lambda: self.end_Sentence())
        self.endSentence.grid(row=2, column=0, columnspan=1, padx=50, pady=5)
        self.endMeeting = Button(self.buttons, width=10, padx=10, pady=5, text='End Meeting', command=lambda: self.end_Meeting())
        self.endMeeting.grid(row=4, column=0, padx=50, pady=5)

        tkinter.mainloop()
    #functions for buttons

    def start_Meeting(self):
        self.date = "Meeting Date and Time: " + str(datetime.now())
        self.startMeeting["state"] = "disabled"
        self.endMeeting["state"] = "disabled"
        self.endSentence["state"] = "normal"
        self.st = 1
        self.frames = []
        self.stream = self.p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK)
        while self.st == 1:
            data = self.stream.read(self.CHUNK)
            self.frames.append(data)
            self.main.update()   

    def new_Sentence(self):
        self.newSentence["state"] = "disabled"
        self.endMeeting["state"] = "disabled"
        self.endSentence["state"] = "normal"
        self.st = 1
        self.frames = []
        self.stream = self.p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK)
        while self.st == 1:
            data = self.stream.read(self.CHUNK)
            self.frames.append(data)
            self.main.update()        

    def end_Sentence(self):
        self.totalSentences += 1
        self.st = 0
        self.newSentence["state"] = "normal"
        self.endSentence["state"] = "disabled"
        self.endMeeting["state"] = "normal"
        sample_width = self.p.get_sample_size(self.FORMAT)
        self.stream.stop_stream()
        self.stream.close()
        
        file_name = 'config/output/sentence' + str(self.totalSentences) + '.wav'
        
        wf = wave.open(file_name, 'wb')
        wf.setnchannels(self.CHANNELS)
        wf.setsampwidth(sample_width)
        wf.setframerate(self.RATE)
        wf.writeframes(b''.join(self.frames))
        wf.close()
        
        sample_file = 'config/output/sentence' + str(self.totalSentences) + '.wav'

        commands = ['ability','able','about','abroad','absence','address','affect','alcohol','anything','background',
                    'bad','bag','balance','ball','ban','bank','bed','bird','borrow','cabin','cake','calculate','call',
                    'camera','camp','cat','chef','clinic','club','dangerous','dare','dark','daughter','decline',
                    'decrease','definition','degree','delay','dog','down','each','early','earn','earth','easily','effective',
                    'eight','energy','five','four','go','happy','house','left','light','limit','listen','locate','look','mad',
                    'majority','manage','massive','material','meaning','memory','minor','news','night','nine','no','normal',
                    'note','nothing','off','on','one','right','seven','six','stop','three','tree','two','up','wow','yes','zero']
        
        audio, sr = librosa.load(sample_file)
        clips = librosa.effects.split(audio, top_db=30)

        breakdownFiles = []

        audio_index = 1
        for c in clips:
            data = audio[c[0]: c[1]]
            breakdownFile = 'config/breakdown/breakdown' + str(self.totalSentences) + "-" + str(audio_index) + '.wav'
            breakdownFiles.append(breakdownFile)
            sf.write(breakdownFile, data, sr)
            audio_index += 1

        if len(breakdownFiles) > 0:
            for f in breakdownFiles:
                sample_ds = preprocess_dataset([str(f)])

                spectrogram = sample_ds.batch(1)
                prediction = model.predict(spectrogram)
                probs = prediction.max(1)
                index, = np.where(prediction[0] == probs[0])
                self.words += " " + commands[int(index)]

            self.words += "."
        
    
    def end_Meeting(self): #end meeting
        self.newSentence["state"] = "disabled"
        self.endMeeting["state"] = "disabled"
        self.p.terminate()
        #combine multiple files to one audio
        if self.totalSentences > 0:
            sounds = []
            dir = 'config/output/'
            for f in os.listdir(dir):
                sounds.append(AudioSegment.from_file(os.path.join(dir, f), format="wav"))


            if len(sounds) > 0:
                combined = sounds[0]

                for y in range(1, len(sounds)):
                    combined += sounds[y]

                file_handle = combined.export("meetingRecording.wav", format="wav")

                for f in os.listdir(dir):
                    os.remove(os.path.join(dir, f))

                dir = 'config/breakdown/'
                for f in os.listdir(dir):
                    os.remove(os.path.join(dir, f))

                f = open("meetingReport.txt", "w")
                overall_report = self.date + "\n" + self.words
                f.write(overall_report)
                f.close()
            

# Create an object of the ProgramGUI class to begin the program.
guiAUD = RecAUD()