In [1]:
from keras.models import load_model

from features import *
from helper import *
import pyaudio
import struct
import numpy as np
import matplotlib.pyplot as plt
import time
from tkinter import TclError
from collections import deque
from scipy.interpolate import interp1d

# use this backend to display in separate Tk window
%matplotlib tk
#%matplotlib inline

# p = pyaudio.PyAudio()
# for i in range(p.get_device_count()):
#     print(p.get_device_info_by_index(i))

Using TensorFlow backend.


In [2]:
# constants
TIME_SEC = 2
RATE = 44100                 # samples per second
CHUNK = int(44100 * TIME_SEC)     # 1024 * 20 #int(44100 / TIME_SEC)             # samples per frame
FORMAT = pyaudio.paInt16     # audio format (bytes per sample?)
CHANNELS = 1                 # single channel for microphone

TRANSCRIPTION = [("You\'re", 5.3) , ("gonna", 0.2) , ("hear", 0.2) , ("the", 0.43) , ("truth", 0.86) ,
                ("What", 2.8) , ("you", 0.3) , ("are", 0.2) , ("and", 2.24) , ("what", 0.43) , ("I", 0.46) , ("am", 0.2) ,
                ("We", 3.40) , ("never", 0.2) , ("told", 0.2) , ("the", 0.2) , ("truth", 0.2) , ("for", 0.2) , ("10", 0.2) , ("minutes", 0.2) , ("in", 0.2) , ("this", 0.2) , ("house", 0.2) ,
                ("well", 2.3) , ("hear", 0.2) , ("this", 0.2) , ("Willy", 0.2) ,
                ("this", 1.8) , ("is", 0.2) , ("me", 0.2) ,
                ("You", 2.9) , ("wanna", 0.2), ("know", 0.2) , ("why", 0.2) , ("I", 0.2) , ("had", 0.2) , ("no", 0.2) , ("address", 0.2) ,
                ("for", 0.2) , ("three", 0.2) , ("months", 0.2) ,
                ("I\'ts", 1.54) , ("cuz", 0.2) , ("I", 0.2) , ("stole", 0.2) , ("a", 0.2) , ("suit", 0.2) , ("in", 0.2) , ("Kansas", 0.2) , ("City", 0.2) , ("and", 0.4) , ("I", 0.2) , ("was", 0.2) , ("in", 0.2) , ("jail", 0.2) ,
                ("I", 2.6) , ("stole", 0.2) , ("myself", 0.2) , ("out", 0.2) , ("of", 0.2) , ("every", 0.2) , ("good", 0.2) , ("job", 0.2) , ("since", 0.2) , ("high", 0.2) , ("school", 0.2) ,

                ("And", 0.4) , ("I", 0.2) , ("never", 0.2) , ("got", 0.2) , ("anywhere", 0.2) ,
                ("because", 1.8) , ("you", 0.2) , ("blew", 0.2) , ("me", 0.2) , ("so", 0.2) , ("full", 0.2) , ("of", 0.2) , ("hot", 0.2) , ("air", 0.2) ,
                ("I", 0.4) , ("could", 0.2) , ("never", 0.2) , ("stand", 0.2) , ("taking", 0.2) , ("orders", 0.2) , ("from", 0.2) , ("anybody", 0.2) ,

                ("That\'s", 1.4) , ("whose", 0.2) , ("fault", 0.2) , ("it", 0.2) , ("is", 0.2) ,

                ("It\'s", 1.72) , ("goddamn", 0.2) , ("time", 0.2) , ("you", 0.2) , ("heard", 0.2) , ("that", 0.2) , ("pap", 0.2)]


In [3]:
def text_manual(text_array, delay_lim, delay):

    total_time = 0
    text_out = ""

    for word, time_delay in text_array:
        total_time += time_delay
        if total_time > delay:
            break
        elif total_time > delay_lim:
            text_out += str(word) + " "

    return text_out #, total_time



def plot_line_graph(graph, data):
    graph.axhline(y=0.5, color='r', linestyle='--')

    # ang_data = interp1d(data.index, data['ang'],kind='cubic')
    # exc_data = interp1d(data.index, data['exc'],kind='cubic')
    # neu_data = interp1d(data.index, data['neu'],kind='cubic')
    # sad_data = interp1d(data.index, data['sad'],kind='cubic')

    ang_data = data['ang']
    exc_data = data['exc']
    neu_data = data['neu']
    sad_data = data['sad']

    graph.plot(ang_data, label="Anger", color='r')
    graph.plot(exc_data, label="Excited", color='y')
    graph.plot(neu_data, label="Neutral", color='g')
    graph.plot(sad_data, label="Sad", color='b')
    graph.legend(loc="upper left")

    return graph

def plot_bar_graph (graph, data):
    graph.axhline(y=0.5, color='r', linestyle='--')
    graph.bar(x=0, height=data[0], label="Anger", color='r')
    graph.bar(x=1, height=data[1], label="Excited", color='y')
    graph.bar(x=2, height=data[2], label="Neutral", color='g')
    graph.bar(x=3, height=data[3], label="Sad", color='b')
    graph.legend(loc="upper left")

    return graph

def plot_text(graph, text_array):
    char_size = 12
    empty_string_size = char_size
    total_input_string_size = 0

    word_index = 0.012
    text_total= ""

    #graph.cla()

    #graph.rcParams.update({'font.size': char_size})

    for text in text_array:
        text = text + " "
        text_size = len(text) * 0.012
        total_input_string_size += text_size
        text_total += text
        #plt.text(word_index, 0.5, text, bbox=dict(facecolor='red', alpha=0.5))
        word_index += text_size

    graph.text(0.1, 0.5, text_total, bbox=dict(facecolor='red', alpha=0.5))

    #plt.xlim([0, word_index + 0.1])

    return graph


In [4]:
def main(window_size = 0.5, window_n = 20, monolog_output = False):

    # pyaudio class instance
    p = pyaudio.PyAudio()

    # stream object to get data from microphone
    stream = p.open(
        format=FORMAT,
        channels=CHANNELS,
        rate=RATE,
        input=True,
        output=True,
        frames_per_buffer=int(44100 * TIME_SEC),
        input_device_index = 5
    )

    # set up model
    model = load_model("../Trained_models/2_layer_LSTM.pickle")
    cols = ['ang', 'exc', 'neu', 'sad']
    df_pred_wav = pd.DataFrame([np.zeros(4)], columns=cols)
    graph_window = 30

    if monolog_output:
        fig , ax = plt.subplots(3)
    else:
        fig , ax = plt.subplots(2)

    # basic formatting for the axes
    ax[0].set_title('Emotion Prediction')
    ax[0].set_xlabel('Time')
    ax[0].set_ylabel('Confidence')

    # ax[0]=  plot_line_graph(ax[0], df_pred_wav)
    # ax[1] = plot_bar_graph(ax[1], [0,0,0,0])

    if monolog_output:
        test_text = "this is a test of the text output"
        ax[2] = plot_text(ax[2], test_text.split())

    # show the plot
    plt.show(block=False)

    # for measuring frame rate
    frame_count = 0
    start_time = time.time()

    buffer = deque(maxlen=window_n)

    CHUNK = int(RATE * window_size)

    #fill buffer
    for i in range(window_n):
        buffer.append([])

    delay = 0
    time_elapsed = 0

    print("starting loop")
    while (True):

        start_time_collect = time.time()

        # binary data
        data_new = stream.read(CHUNK)

        time_collect = time.time() - start_time_collect

        time_elapsed += time_collect + window_size

        data_new = struct.unpack(str(2 * CHUNK) + 'B', data_new)

        buffer.append(data_new)

        #compile audio data across all windows
        data_int = []
        for i in range(window_n):
            data_prev = buffer[i]
            data_int = np.append(data_int, data_prev)

        # Generate features from data
        st_features = calculate_features(data_int, RATE, None)
        st_features, _ = pad_sequence_into_array(st_features, maxlen=100)

        # reshape input from (34, 100) to (1, 100, 34)
        st_features = np.array([st_features.T])

        predict_time_start = time.time()
        # predict on model
        wav_test_results = model.predict(st_features)
        predict_time_total = time.time() - predict_time_start

        graphing_time_start = time.time()
        df_pred_wav = df_pred_wav.append({cols[0]:wav_test_results[0][0],
                            cols[1]:wav_test_results[0][1],
                            cols[2]:wav_test_results[0][2],
                            cols[3]:wav_test_results[0][3]
        }, ignore_index=True)

        df_pred_wav_view = df_pred_wav.tail(graph_window)
        df_pred_wav_view.reset_index(drop=True, inplace=True)
        ax[0].cla()
        ax[1].cla()
        ax[0] = plot_line_graph(ax[0], df_pred_wav_view)

        last_res = wav_test_results[0]
        ax[1] = plot_bar_graph (ax[1], last_res)


        if monolog_output:
            ax[2].cla()
            delay_lim = time_elapsed - ((window_n - 1) * window_size)
            text = text_manual(TRANSCRIPTION, delay_lim, time_elapsed)

            ax[2]= plot_text(ax[2], text.split())

        graphing_time_total = time.time() - graphing_time_start

        # update figure canvas
        try:
            fig.canvas.draw()
            fig.canvas.flush_events()
            frame_count += 1

        except TclError:

            # calculate average frame rate
            frame_rate = frame_count / (time.time() - start_time)

            print('stream stopped')
            print('average frame rate = {:.0f} FPS'.format(frame_rate), end = '|')
            print('prediction_time = {:f} seconds'.format(predict_time_total), end = '|')
            print('graphing_time = {:f} seconds'.format(predict_time_total), end = '|')
            

In [5]:
main(window_size = 0.5, window_n = 20, monolog_output = False)



starting loop
stream stopped
average frame rate = 2 FPS|prediction_time = 0.033004 seconds|graphing_time = 0.033004 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.045001 seconds|graphing_time = 0.045001 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.032998 seconds|graphing_time = 0.032998 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.033001 seconds|graphing_time = 0.033001 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.032999 seconds|graphing_time = 0.032999 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.032001 seconds|graphing_time = 0.032001 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.031989 seconds|graphing_time = 0.031989 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.033003 seconds|graphing_time = 0.033003 seconds|stream stopped
average frame rate = 2 FPS|prediction_time = 0.039995 seconds|graphing_time = 0.039995 seco

KeyboardInterrupt: 