Skip to content

Commit

Permalink
-
Browse files Browse the repository at this point in the history
  • Loading branch information
previ committed Sep 6, 2015
1 parent 57d7c70 commit 6a6abc4
Show file tree
Hide file tree
Showing 16 changed files with 385 additions and 19 deletions.
157 changes: 157 additions & 0 deletions audio.py
@@ -0,0 +1,157 @@
import os
import time
from sys import byteorder
from array import array
from struct import pack

import pyaudio
import wave
import logging

from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *

CHUNK_SIZE = 131072
FORMAT = pyaudio.paInt16
RATE = 44100

MODELDIR = "/home/pi/coderbot/psmodels/"
SOUNDDIR = "./sounds/"

class Audio:

_instance = None

@classmethod
def get_instance(cls):
if cls._instance is None:
cls._instance = Audio()
return cls._instance

def say(self, what):
if what and "$" in what:
os.system ('omxplayer sounds/' + what[1:])
elif what and len(what):
os.system ('espeak -vit -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null')

def normalize(self, snd_data):
"Average the volume out"
MAXIMUM = 16384
times = float(MAXIMUM)/max(abs(i) for i in snd_data)

r = array('h')
for i in snd_data:
r.append(int(i*times))
return r

def record(self, elapse):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, input_device_index=0, rate=RATE,
input=True,
frames_per_buffer=CHUNK_SIZE)

num_silent = 0
snd_started = False
c = 0

r = array('h')

while (c * 2.0 * 8192 / 44100) < elapse:
c += 1
# little endian, signed short
snd_data = array('h', stream.read(CHUNK_SIZE))
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)

sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()

r = self.normalize(r)

return sample_width, r

def record_to_file(self, filename, elapse):
sample_width, data = self.record(elapse)
data = pack('<' + ('h'*len(data)), *data)

wf = wave.open(filename, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()

def play(self, filename):
# open the file for reading.
wf = wave.open(filename, 'rb')

# create an audio object
p = pyaudio.PyAudio()

# open stream based on the wave object which has been input.
stream = p.open(format =
p.get_format_from_width(wf.getsampwidth()),
channels = wf.getnchannels(),
rate = wf.getframerate(),
output = True)

# read data (based on the chunk size)
data = wf.readframes(CHUNK_SIZE)

# play stream (looping from beginning of file to the end)
while data != '':
# writing to the stream is what *actually* plays the sound.
stream.write(data)
data = wf.readframes(CHUNK_SIZE)

# cleanup stuff.
stream.close()
p.terminate()

def speech_recog(self, model):

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us')
config.set_string('-kws', MODELDIR + model + '.txt')
#config.set_string('-lm', MODELDIR + model + '.lm')
config.set_string('-dict', MODELDIR + model + '.dict')
decoder = Decoder(config)

p = pyaudio.PyAudio()
#stream = p.open(format=pyaudio.paInt16, channels=1, input_device_index=0, rate=16000, input=True, frames_per_buffer=1024)
stream = p.open(format=FORMAT, channels=1, input_device_index=0, rate=RATE,
input=True,
frames_per_buffer=CHUNK_SIZE)
stream.start_stream()
decoder.start_utt()
tstamp = time.time()
recog_text = ''

while time.time() - tstamp < 10.0:
try:
buf = stream.read(CHUNK_SIZE)
except IOError as ex:
if ex[1] != pyaudio.paInputOverflowed:
raise
buf = '\x00' * CHUNK_SIZE #white noise
logging.info("white noise")

decoder.process_raw(buf, False, False)
try:
if decoder.hyp().hypstr != '':
recog_text += decoder.hyp().hypstr
print "text: " + decoder.hyp().hypstr
tstamp = time.time()
except AttributeError:
pass
decoder.end_utt()
stream.close()
p.terminate()

logging.info("recog text: " + recog_text)
return recog_text

6 changes: 0 additions & 6 deletions coderbot.py
Expand Up @@ -133,12 +133,6 @@ def stop(self):
def is_moving(self):
return self._is_moving

def say(self, what):
if what and "$" in what:
os.system ('omxplayer sounds/' + what[1:])
elif what and len(what):
os.system ('espeak -vit -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null')

def set_callback(self, gpio, callback, elapse):
self._cb_elapse[gpio] = elapse * 1000
self._cb[gpio] = callback
Expand Down
1 change: 1 addition & 0 deletions data/program_test_sound.data
@@ -0,0 +1 @@
{"dom_code": "<xml xmlns=\"http://www.w3.org/1999/xhtml\"><block type=\"coderbot_audio_record\" id=\"13\" x=\"1\" y=\"49\"><value name=\"FILENAME\"><block type=\"text\" id=\"34\"><field name=\"TEXT\">test.wav</field></block></value><value name=\"ELAPSE\"><block type=\"math_number\" id=\"53\"><field name=\"NUM\">10</field></block></value><next><block type=\"coderbot_audio_play\" id=\"18\"><value name=\"FILENAME\"><block type=\"text\" id=\"54\"><field name=\"TEXT\">test.wav</field></block></value></block></next></block></xml>", "code": "get_audio().record_to_file(filename='test.wav', elapse=10)get_audio().play('test.wav')\n", "name": "test_sound"}
1 change: 1 addition & 0 deletions data/program_test_speech_recog.data
@@ -0,0 +1 @@
{"dom_code": "<xml xmlns=\"http://www.w3.org/1999/xhtml\"><block type=\"text_print\" id=\"26\" x=\"-24\" y=\"170\"><value name=\"TEXT\"><block type=\"coderbot_audio_listen\" id=\"10\"><field name=\"MODEL\">model_simple</field></block></value></block></xml>", "code": "get_cam().set_text(get_audio().speech_recog(model=\"model_simple))\n", "name": "test_speech_recog"}
16 changes: 10 additions & 6 deletions main.py
Expand Up @@ -6,6 +6,7 @@
from coderbot import CoderBot, PIN_PUSHBUTTON
from camera import Camera
from motion import Motion
from audio import Audio
from program import ProgramEngine, Program
from config import Config

Expand All @@ -23,6 +24,7 @@
bot = None
cam = None
motion = None
audio = None

app = Flask(__name__,static_url_path="")
#app.config.from_pyfile('coderbot.cfg')
Expand Down Expand Up @@ -85,21 +87,21 @@ def handle_bot():
motion.stop()
elif cmd == "take_photo":
cam.photo_take()
bot.say(app.bot_config.get("sound_shutter"))
audio.say(app.bot_config.get("sound_shutter"))
elif cmd == "video_rec":
cam.video_rec()
bot.say(app.bot_config.get("sound_shutter"))
audio.say(app.bot_config.get("sound_shutter"))
elif cmd == "video_stop":
cam.video_stop()
bot.say(app.bot_config.get("sound_shutter"))
audio.say(app.bot_config.get("sound_shutter"))

elif cmd == "say":
logging.info("say: " + str(param1))
bot.say(param1)
audio.say(param1)

elif cmd == "halt":
logging.info("shutting down")
bot.say(app.bot_config.get("sound_stop"))
audio.say(app.bot_config.get("sound_stop"))
bot.halt()
elif cmd == "restart":
logging.info("restarting bot")
Expand Down Expand Up @@ -211,17 +213,19 @@ def run_server():
global bot
global cam
global motion
global audio
try:
app.bot_config = Config.read()
bot = CoderBot.get_instance(servo=(app.bot_config.get("move_motor_mode")=="servo"))
cam = Camera.get_instance()
motion = Motion.get_instance()
audio = Audio.get_instance()
except ValueError as e:
app.bot_config = {}
logging.error(e)
if app.bot_config.get('load_at_start') and len(app.bot_config.get('load_at_start')):
app.prog = app.prog_engine.load(app.bot_config.get('load_at_start'))

bot.set_callback(PIN_PUSHBUTTON, button_pushed, 100)
bot.say(app.bot_config.get("sound_start"))
audio.say(app.bot_config.get("sound_start"))
app.run(host="0.0.0.0", port=8080, debug=True, use_reloader=False, threaded=True)
4 changes: 4 additions & 0 deletions program.py
Expand Up @@ -8,6 +8,7 @@
import camera
import motion
import config
import audio

PROGRAM_PATH = "./data/"
PROGRAM_PREFIX = "program_"
Expand All @@ -22,6 +23,9 @@ def get_bot():
def get_motion():
return motion.Motion.get_instance()

def get_audio():
return audio.Audio.get_instance()

def get_prog_eng():
return ProgramEngine.get_instance()

Expand Down
18 changes: 18 additions & 0 deletions psmodels/model_simple.dict
@@ -0,0 +1,18 @@
A AH
A(2) EY
BACKWARD B AE K W ER D
CODERBOT K OW D AH R B AA T
FORWARD F AO R W ER D
LEFT L EH F T
MOVE M UW V
OKAY OW K EY
PICTURE P IH K CH ER
RECORD R AH K AO R D
RECORD(2) R EH K ER D
RECORD(3) R IH K AO R D
RIGHT R AY T
SAY S EY
STOP S T AA P
TAKE T EY K
TURN T ER N
VIDEO V IH D IY OW
82 changes: 82 additions & 0 deletions psmodels/model_simple.lm
@@ -0,0 +1,82 @@
Language model created by QuickLM on Fri Jul 17 10:28:00 EDT 2015
Copyright (c) 1996-2010 Carnegie Mellon University and Alexander I. Rudnicky

The model is in standard ARPA format, designed by Doug Paul while he was at MITRE.

The code that was used to produce this language model is available in Open Source.
Please visit http://www.speech.cs.cmu.edu/tools/ for more information

The (fixed) discount mass is 0.5. The backoffs are computed using the ratio method.
This model based on a corpus of 9 sentences and 17 words

\data\
ngram 1=17
ngram 2=24
ngram 3=18

\1-grams:
-0.9031 </s> -0.3010
-0.9031 <s> -0.2430
-1.8573 A -0.2950
-1.8573 BACKWARD -0.2430
-1.8573 CODERBOT -0.2430
-1.8573 FORWARD -0.2430
-1.8573 LEFT -0.2430
-1.5563 MOVE -0.2888
-1.8573 OKAY -0.2950
-1.8573 PICTURE -0.2430
-1.8573 RECORD -0.2888
-1.8573 RIGHT -0.2430
-1.8573 SAY -0.2430
-1.8573 STOP -0.2888
-1.8573 TAKE -0.2950
-1.5563 TURN -0.2888
-1.5563 VIDEO -0.2430

\2-grams:
-0.9542 <s> MOVE 0.0000
-1.2553 <s> OKAY 0.0000
-1.2553 <s> RECORD 0.0000
-1.2553 <s> SAY 0.0000
-1.2553 <s> STOP 0.0000
-1.2553 <s> TAKE 0.0000
-0.9542 <s> TURN 0.0000
-0.3010 A PICTURE 0.0000
-0.3010 BACKWARD </s> -0.3010
-0.3010 CODERBOT </s> -0.3010
-0.3010 FORWARD </s> -0.3010
-0.3010 LEFT </s> -0.3010
-0.6021 MOVE BACKWARD 0.0000
-0.6021 MOVE FORWARD 0.0000
-0.3010 OKAY CODERBOT 0.0000
-0.3010 PICTURE </s> -0.3010
-0.3010 RECORD VIDEO 0.0000
-0.3010 RIGHT </s> -0.3010
-0.3010 SAY </s> -0.3010
-0.3010 STOP VIDEO 0.0000
-0.3010 TAKE A 0.0000
-0.6021 TURN LEFT 0.0000
-0.6021 TURN RIGHT 0.0000
-0.3010 VIDEO </s> -0.3010

\3-grams:
-0.6021 <s> MOVE BACKWARD
-0.6021 <s> MOVE FORWARD
-0.3010 <s> OKAY CODERBOT
-0.3010 <s> RECORD VIDEO
-0.3010 <s> SAY </s>
-0.3010 <s> STOP VIDEO
-0.3010 <s> TAKE A
-0.6021 <s> TURN LEFT
-0.6021 <s> TURN RIGHT
-0.3010 A PICTURE </s>
-0.3010 MOVE BACKWARD </s>
-0.3010 MOVE FORWARD </s>
-0.3010 OKAY CODERBOT </s>
-0.3010 RECORD VIDEO </s>
-0.3010 STOP VIDEO </s>
-0.3010 TAKE A PICTURE
-0.3010 TURN LEFT </s>
-0.3010 TURN RIGHT </s>

\end\
6 changes: 6 additions & 0 deletions psmodels/model_simple.txt
@@ -0,0 +1,6 @@
MOVE /1e-10/
FORWARD /1e-10/
BACKWARD /1e-10/
LEFT /1e-10/
RIGHT /1e-10/
STOP /1e-10/

0 comments on commit 6a6abc4

Please sign in to comment.