In [1]:
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
from pyAudioAnalysis import audioTrainTest as aT
import numpy as np
import speech_recognition as sr
import time
import threading
import sklearn
import os
import traceback

In [2]:
classifier = "knn"
training_labels_file = "labels.txt"
model_file = "model"
test_file = "test.wav"

In [3]:
def record_and_rebuild(recognizer, microphone, username):
    with microphone as source:
        audio = recognizer.record(source, duration=4, offset=0) # Record for 4 seconds

    user_dir = "data/" + username
        
    # Bug: We need two files for classification
    os.mkdir(user_dir)
    
    # Store the audio
    with open(user_dir + "/1.wav", "wb") as f:
        f.write(audio.get_wav_data())
    with open(user_dir + "/2.wav", "wb") as f:
        f.write(audio.get_wav_data())

    # Append the feature 
    with open(training_labels_file, "a") as f:
        f.write(user_dir + "," + username + "\n")
        
    # Rebuild model
    retrain()

In [4]:
def retrain():
    # Delete old model
    try:
        os.remove(model_file + ".arff")
        os.remove(model_file + "MEAN")
        os.remove(model_file)
    except:
        pass
    
    try:
        labels = np.loadtxt(open(training_labels_file, "r"), delimiter=",", dtype=str)
        directories = list(labels[::,0])
        aT.featureAndTrain(directories, 1.0, 1.0, aT.shortTermWindow, aT.shortTermStep, classifier, model_file, False)
    except Exception as e:
        print(e)

In [5]:
retrain()

STEP A: Feature Extraction
Analyzing file 1 of 2: b'data/noise/1.wav'
Analyzing file 2 of 2: b'data/noise/2.wav'
Feature extraction complexity ratio: 7.5 x realtime
Analyzing file 1 of 2: b'data/tm/1.wav'
Analyzing file 2 of 2: b'data/tm/2.wav'
Feature extraction complexity ratio: 5.7 x realtime
STEP B: Classifier Evaluation and Parameter Selection
Param = 1.00000 - Classifier Evaluation Experiment 1 of 100
Param = 1.00000 - Classifier Evaluation Experiment 2 of 100
Param = 1.00000 - Classifier Evaluation Experiment 3 of 100
Param = 1.00000 - Classifier Evaluation Experiment 4 of 100
Param = 1.00000 - Classifier Evaluation Experiment 5 of 100
Param = 1.00000 - Classifier Evaluation Experiment 6 of 100
Param = 1.00000 - Classifier Evaluation Experiment 7 of 100
Param = 1.00000 - Classifier Evaluation Experiment 8 of 100
Param = 1.00000 - Classifier Evaluation Experiment 9 of 100
Param = 1.00000 - Classifier Evaluation Experiment 10 of 100
Param = 1.00000 - Classifier Evaluation Experime

Param = 7.00000 - Classifier Evaluation Experiment 66 of 100
Param = 7.00000 - Classifier Evaluation Experiment 67 of 100
Param = 7.00000 - Classifier Evaluation Experiment 68 of 100
Param = 7.00000 - Classifier Evaluation Experiment 69 of 100
Param = 7.00000 - Classifier Evaluation Experiment 70 of 100
Param = 7.00000 - Classifier Evaluation Experiment 71 of 100
Param = 7.00000 - Classifier Evaluation Experiment 72 of 100
Param = 7.00000 - Classifier Evaluation Experiment 73 of 100
Param = 7.00000 - Classifier Evaluation Experiment 74 of 100
Param = 7.00000 - Classifier Evaluation Experiment 75 of 100
Param = 7.00000 - Classifier Evaluation Experiment 76 of 100
Param = 7.00000 - Classifier Evaluation Experiment 77 of 100
Param = 7.00000 - Classifier Evaluation Experiment 78 of 100
Param = 7.00000 - Classifier Evaluation Experiment 79 of 100
Param = 7.00000 - Classifier Evaluation Experiment 80 of 100
Param = 7.00000 - Classifier Evaluation Experiment 81 of 100
Param = 7.00000 - Classi

In [13]:
def guess(application, recognizer, microphone):   
    with microphone as source:
        audio = recognizer.listen(source)
    
    try:
        text = recognizer.recognize_sphinx(audio)
    except sr.UnknownValueError:
        print("Sphinx could not understand audio")
    except sr.RequestError as e:
        print("Sphinx error; {0}".format(e))

    with open(test_file, "wb") as f:
        f.write(audio.get_wav_data())

    try:
        out = aT.fileClassification(test_file, model_file, classifier)
        speakers = np.loadtxt(open("labels.txt", "r"), delimiter=",", dtype=str)[::,1]
        speakers = np.sort(speakers)
        application.set_result(text, speakers[int(out[0])])
    except Exception as e:
        print("Classification error; {0}".format(e))
        print(traceback.format_exc())

In [7]:
def record_or_guess_loop(application, recognizer, microphone):
    # Run until shut down
    t = threading.currentThread()
    while getattr(t, "stop") == False:
        username = getattr(t, "classify")
        if username != False:
            record_and_rebuild(recognizer, microphone, username)
            t.classify = False
        else:
            guess(application, recognizer, microphone)
            
    print("Stopping thread")

In [None]:
import tkinter as tk
import time
from tkinter import *

class Application(tk.Frame):
    
    def __init__(self, r, m, master=None):
        self.result_var = StringVar()
        self.schemas = dict()
        
        self.r = r
        self.m = m
    
        tk.Frame.__init__(self, master)
        self.pack()
        self.createWidgets()
        
    def makeentry(parent, caption, width=None, **options):
        tk.Label(parent, text=caption).pack(side=LEFT)
        parent.entry = tk.Entry(parent, **options)
        if width:
            parent.entry.config(width=width)
        parent.entry.pack(side=LEFT)
        return parent.entry

    def createWidgets(self):        
        self.makeentry("Enter username")

        self.record = tk.Button(self, text="Start recording", fg="red", command=self.start_record)
        self.record.pack(side="bottom")
        
        self.result = Label(root, textvariable = self.result_var)
        self.result.pack()
        
    def set_result(self, text, speaker):
        if (speaker == "noise"):
            return
        
        #if speaker in self.schemas:
        #    schema = self.schemas.get(speaker)
        #else:
        #    schema = !java -jar "SkemaFetcher/target/SkemaBackend-1.0-SNAPSHOT-jar-with-dependencies.jar" $speaker
        #    schema = schema[0]
        #    self.schemas[speaker] = schema
        #if (schema == []):
        #    schema = "No schema for " + speaker
        self.result_var.set(speaker)
    
    def start_record(self):
        username = self.entry.get()
        self.record_thread.classify = username # Trigger recording of user

        
# Initialize recognizer and microphone
r = sr.Recognizer()
m = sr.Microphone()
with m as source:
    r.adjust_for_ambient_noise(source)  # we only need to calibrate once, before we start listening
    r.energy_threshold = 2000
    r.pause_threshold = 0.2
    r.non_speaking_duration = 0.1
    r.dynamic_energy_adjustment_ratio = 2
        
root = tk.Tk()
app = Application(r, m, master=root)

# Start listening in the background and return the thread
thread = threading.Thread(target = record_or_guess_loop, args=(app, r, m,), daemon=True)
thread.stop = False
thread.classify = False
thread.start()

app.record_thread = thread

app.mainloop()

In [16]:
thread.stop = True

Classification error; main thread is not in main loop
Traceback (most recent call last):
  File "<ipython-input-13-072687ee9f1d>", line 19, in guess
    application.set_result(text, speakers[int(out[0])])
  File "<ipython-input-12-49a441c1285f>", line 47, in set_result
    self.result_var.set(speaker)
  File "/usr/lib/python3.5/tkinter/__init__.py", line 263, in set
    return self._tk.globalsetvar(self._name, value)
RuntimeError: main thread is not in main loop

Classification error; main thread is not in main loop
Traceback (most recent call last):
  File "<ipython-input-13-072687ee9f1d>", line 19, in guess
    application.set_result(text, speakers[int(out[0])])
  File "<ipython-input-15-49a441c1285f>", line 47, in set_result
    self.result_var.set(speaker)
  File "/usr/lib/python3.5/tkinter/__init__.py", line 263, in set
    return self._tk.globalsetvar(self._name, value)
RuntimeError: main thread is not in main loop

Stopping thread
