In [1]:
import pandas as pd

import os
import sys
import wave
import copy
import math
import tensorflow as tf
from keras.models import load_model


from features import *
from helper import *
import pyaudio
import struct
import numpy as np
import matplotlib.pyplot as plt
import time
from tkinter import TclError
from scipy.interpolate import interp1d

# use this backend to display in separate Tk window
%matplotlib tk
#%matplotlib inline

# constants

TIME_SEC = 2
RATE = 44100                 # samples per second
CHUNK = int(44100 * TIME_SEC)     # 1024 * 20 #int(44100 / TIME_SEC)             # samples per frame
FORMAT = pyaudio.paInt16     # audio format (bytes per sample?)
CHANNELS = 1                 # single channel for microphone


# p = pyaudio.PyAudio()
# for i in range(p.get_device_count()):
#     print(p.get_device_info_by_index(i))

Using TensorFlow backend.


In [66]:
def plot_line_graph(graph, data):
    graph.axhline(y=0.5, color='r', linestyle='--')
    print(data.index)
    # ang_data = interp1d(data.index, data['ang'],kind='cubic')
    # exc_data = interp1d(data.index, data['exc'],kind='cubic')
    # neu_data = interp1d(data.index, data['neu'],kind='cubic')
    # sad_data = interp1d(data.index, data['sad'],kind='cubic')

    ang_data = data['ang']
    exc_data = data['exc']
    neu_data = data['neu']
    sad_data = data['sad']

    graph.plot(ang_data, label="Anger", color='r')
    graph.plot(exc_data, label="Excited", color='y')
    graph.plot(neu_data, label="Neutral", color='g')
    graph.plot(sad_data, label="Sad", color='b')
    graph.legend(loc="upper left")

    return graph

def plot_bar_graph (graph, data):
    graph.axhline(y=0.5, color='r', linestyle='--')
    graph.bar(x=0, height=data[0], label="Anger", color='r')
    graph.bar(x=1, height=data[1], label="Excited", color='y')
    graph.bar(x=2, height=data[2], label="Neutral", color='g')
    graph.bar(x=3, height=data[3], label="Sad", color='b')
    graph.legend(loc="upper left")

    return graph

def plot_text(text_array):
    char_size = 12
    empty_string_size = char_size
    total_input_string_size = 0

    word_index = 0.012
    text_total= ""

    plt.cla()

    plt.rcParams.update({'font.size': char_size})

    for text in text_array:
        text = text + " "
        text_size = len(text) * 0.012
        total_input_string_size += text_size

        plt.text(word_index, 0.5, text, bbox=dict(facecolor='red', alpha=0.5))
        word_index += text_size

    #plt.text(0.1, 0.5, text_total, bbox=dict(facecolor='red', alpha=0.5))

    plt.xlim([0, word_index])

    return plt

In [4]:
# create matplotlib figure and axes
#fig, ax = plt.subplots(1, figsize=(15, 7))

# pyaudio class instance
p = pyaudio.PyAudio()

print("starting stram decleration")

# stream object to get data from microphone
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    output=True,
    frames_per_buffer=CHUNK,
    input_device_index = 3
)

starting stram decleration


In [5]:
# set up model
model = load_model("Trained_models/2_layer_LSTM.pickle")
cols = ['ang', 'exc', 'neu', 'sad']
df_pred_wav = pd.DataFrame([np.zeros(4)], columns=cols)
data_prev = []
graph_window = 30

In [68]:
# variable for plotting
x = np.arange(0, 2 * CHUNK, 2)
fig , ax = plt.subplots(3)

# basic formatting for the axes
ax[0].set_title('Emotion Prediction')
ax[0].set_xlabel('Time')
ax[0].set_ylabel('Confidence')

ax[0]=  plot_line_graph(ax[0], df_pred_wav)
ax[1] = plot_bar_graph(ax[1], [0,0,0,0])
test_text = "this is a test of the text output"
ax[2] = plot_text(test_text.split())

# show the plot
plt.show(block=False)

# for measuring frame rate
frame_count = 0
start_time = time.time()

RangeIndex(start=0, stop=1, step=1)


In [6]:
from collections import deque
WINDOW_SIZE = 0.5
WINDOW_N = 20
buffer = deque(maxlen=WINDOW_N)

CHUNK = int(RATE * WINDOW_SIZE)

#fill buffer
for i in range(WINDOW_N):
    buffer.append([])

print("starting loop")
while (True):

    # binary data
    data_new = stream.read(CHUNK)

    predict_time_start = time.time()
    #convert data to integers, make np array, then offset it by 127
    data_new = struct.unpack(str(2 * CHUNK) + 'B', data_new)

    buffer.append(data_new)

    data_int = []
    for i in range(WINDOW_N):
        data_prev = buffer[i]
        data_int = np.append(data_int, data_prev)
   #data_int = data_new

    # Generate features from data
    st_features = calculate_features(data_int, RATE, None)
    st_features, _ = pad_sequence_into_array(st_features, maxlen=100)

    # reshape input from (34, 100) to (1, 100, 34)
    st_features = np.array([st_features.T])

    # predict on model
    wav_test_results = model.predict(st_features)

    predict_time_total = time.time() - predict_time_start

    graphing_time_start = time.time()
    df_pred_wav = df_pred_wav.append({cols[0]:wav_test_results[0][0],
                        cols[1]:wav_test_results[0][1],
                        cols[2]:wav_test_results[0][2],
                        cols[3]:wav_test_results[0][3]
    }, ignore_index=True)

    df_pred_wav_view = df_pred_wav.tail(graph_window)
    df_pred_wav_view.reset_index(drop=True, inplace=True)
    ax[0].cla()
    ax[1].cla()

    ax[0] = plot_line_graph(ax[0], df_pred_wav_view)

    last_res = wav_test_results[0]
    ax[1] = plot_bar_graph (ax[1], last_res)

    graphing_time_total = time.time() - graphing_time_start
    # update figure canvas
    try:
        fig.canvas.draw()
        fig.canvas.flush_events()
        frame_count += 1

    except TclError:

        # calculate average frame rate
        frame_rate = frame_count / (time.time() - start_time)

        print('stream stopped')
        print('average frame rate = {:.0f} FPS'.format(frame_rate))
        print('prediction_time = {f} seconds'.format(predict_time_total))
    data_prev = data_new

starting loop
RangeIndex(start=0, stop=2, step=1)
RangeIndex(start=0, stop=3, step=1)
RangeIndex(start=0, stop=4, step=1)
RangeIndex(start=0, stop=5, step=1)
RangeIndex(start=0, stop=6, step=1)
RangeIndex(start=0, stop=7, step=1)
RangeIndex(start=0, stop=8, step=1)
RangeIndex(start=0, stop=9, step=1)
RangeIndex(start=0, stop=10, step=1)
stream stopped
average frame rate = 2 FPS


KeyError: 'f'