Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
385 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
import os | ||
import time | ||
from sys import byteorder | ||
from array import array | ||
from struct import pack | ||
|
||
import pyaudio | ||
import wave | ||
import logging | ||
|
||
from pocketsphinx.pocketsphinx import * | ||
from sphinxbase.sphinxbase import * | ||
|
||
CHUNK_SIZE = 131072 | ||
FORMAT = pyaudio.paInt16 | ||
RATE = 44100 | ||
|
||
MODELDIR = "/home/pi/coderbot/psmodels/" | ||
SOUNDDIR = "./sounds/" | ||
|
||
class Audio: | ||
|
||
_instance = None | ||
|
||
@classmethod | ||
def get_instance(cls): | ||
if cls._instance is None: | ||
cls._instance = Audio() | ||
return cls._instance | ||
|
||
def say(self, what): | ||
if what and "$" in what: | ||
os.system ('omxplayer sounds/' + what[1:]) | ||
elif what and len(what): | ||
os.system ('espeak -vit -p 90 -a 200 -s 150 -g 10 "' + what + '" 2>>/dev/null') | ||
|
||
def normalize(self, snd_data): | ||
"Average the volume out" | ||
MAXIMUM = 16384 | ||
times = float(MAXIMUM)/max(abs(i) for i in snd_data) | ||
|
||
r = array('h') | ||
for i in snd_data: | ||
r.append(int(i*times)) | ||
return r | ||
|
||
def record(self, elapse): | ||
p = pyaudio.PyAudio() | ||
stream = p.open(format=FORMAT, channels=1, input_device_index=0, rate=RATE, | ||
input=True, | ||
frames_per_buffer=CHUNK_SIZE) | ||
|
||
num_silent = 0 | ||
snd_started = False | ||
c = 0 | ||
|
||
r = array('h') | ||
|
||
while (c * 2.0 * 8192 / 44100) < elapse: | ||
c += 1 | ||
# little endian, signed short | ||
snd_data = array('h', stream.read(CHUNK_SIZE)) | ||
if byteorder == 'big': | ||
snd_data.byteswap() | ||
r.extend(snd_data) | ||
|
||
sample_width = p.get_sample_size(FORMAT) | ||
stream.stop_stream() | ||
stream.close() | ||
p.terminate() | ||
|
||
r = self.normalize(r) | ||
|
||
return sample_width, r | ||
|
||
def record_to_file(self, filename, elapse): | ||
sample_width, data = self.record(elapse) | ||
data = pack('<' + ('h'*len(data)), *data) | ||
|
||
wf = wave.open(filename, 'wb') | ||
wf.setnchannels(1) | ||
wf.setsampwidth(sample_width) | ||
wf.setframerate(RATE) | ||
wf.writeframes(data) | ||
wf.close() | ||
|
||
def play(self, filename): | ||
# open the file for reading. | ||
wf = wave.open(filename, 'rb') | ||
|
||
# create an audio object | ||
p = pyaudio.PyAudio() | ||
|
||
# open stream based on the wave object which has been input. | ||
stream = p.open(format = | ||
p.get_format_from_width(wf.getsampwidth()), | ||
channels = wf.getnchannels(), | ||
rate = wf.getframerate(), | ||
output = True) | ||
|
||
# read data (based on the chunk size) | ||
data = wf.readframes(CHUNK_SIZE) | ||
|
||
# play stream (looping from beginning of file to the end) | ||
while data != '': | ||
# writing to the stream is what *actually* plays the sound. | ||
stream.write(data) | ||
data = wf.readframes(CHUNK_SIZE) | ||
|
||
# cleanup stuff. | ||
stream.close() | ||
p.terminate() | ||
|
||
def speech_recog(self, model): | ||
|
||
# Create a decoder with certain model | ||
config = Decoder.default_config() | ||
config.set_string('-hmm', '/usr/local/share/pocketsphinx/model/en-us/en-us') | ||
config.set_string('-kws', MODELDIR + model + '.txt') | ||
#config.set_string('-lm', MODELDIR + model + '.lm') | ||
config.set_string('-dict', MODELDIR + model + '.dict') | ||
decoder = Decoder(config) | ||
|
||
p = pyaudio.PyAudio() | ||
#stream = p.open(format=pyaudio.paInt16, channels=1, input_device_index=0, rate=16000, input=True, frames_per_buffer=1024) | ||
stream = p.open(format=FORMAT, channels=1, input_device_index=0, rate=RATE, | ||
input=True, | ||
frames_per_buffer=CHUNK_SIZE) | ||
stream.start_stream() | ||
decoder.start_utt() | ||
tstamp = time.time() | ||
recog_text = '' | ||
|
||
while time.time() - tstamp < 10.0: | ||
try: | ||
buf = stream.read(CHUNK_SIZE) | ||
except IOError as ex: | ||
if ex[1] != pyaudio.paInputOverflowed: | ||
raise | ||
buf = '\x00' * CHUNK_SIZE #white noise | ||
logging.info("white noise") | ||
|
||
decoder.process_raw(buf, False, False) | ||
try: | ||
if decoder.hyp().hypstr != '': | ||
recog_text += decoder.hyp().hypstr | ||
print "text: " + decoder.hyp().hypstr | ||
tstamp = time.time() | ||
except AttributeError: | ||
pass | ||
decoder.end_utt() | ||
stream.close() | ||
p.terminate() | ||
|
||
logging.info("recog text: " + recog_text) | ||
return recog_text | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"dom_code": "<xml xmlns=\"http://www.w3.org/1999/xhtml\"><block type=\"coderbot_audio_record\" id=\"13\" x=\"1\" y=\"49\"><value name=\"FILENAME\"><block type=\"text\" id=\"34\"><field name=\"TEXT\">test.wav</field></block></value><value name=\"ELAPSE\"><block type=\"math_number\" id=\"53\"><field name=\"NUM\">10</field></block></value><next><block type=\"coderbot_audio_play\" id=\"18\"><value name=\"FILENAME\"><block type=\"text\" id=\"54\"><field name=\"TEXT\">test.wav</field></block></value></block></next></block></xml>", "code": "get_audio().record_to_file(filename='test.wav', elapse=10)get_audio().play('test.wav')\n", "name": "test_sound"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"dom_code": "<xml xmlns=\"http://www.w3.org/1999/xhtml\"><block type=\"text_print\" id=\"26\" x=\"-24\" y=\"170\"><value name=\"TEXT\"><block type=\"coderbot_audio_listen\" id=\"10\"><field name=\"MODEL\">model_simple</field></block></value></block></xml>", "code": "get_cam().set_text(get_audio().speech_recog(model=\"model_simple))\n", "name": "test_speech_recog"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
A AH | ||
A(2) EY | ||
BACKWARD B AE K W ER D | ||
CODERBOT K OW D AH R B AA T | ||
FORWARD F AO R W ER D | ||
LEFT L EH F T | ||
MOVE M UW V | ||
OKAY OW K EY | ||
PICTURE P IH K CH ER | ||
RECORD R AH K AO R D | ||
RECORD(2) R EH K ER D | ||
RECORD(3) R IH K AO R D | ||
RIGHT R AY T | ||
SAY S EY | ||
STOP S T AA P | ||
TAKE T EY K | ||
TURN T ER N | ||
VIDEO V IH D IY OW |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
Language model created by QuickLM on Fri Jul 17 10:28:00 EDT 2015 | ||
Copyright (c) 1996-2010 Carnegie Mellon University and Alexander I. Rudnicky | ||
|
||
The model is in standard ARPA format, designed by Doug Paul while he was at MITRE. | ||
|
||
The code that was used to produce this language model is available in Open Source. | ||
Please visit http://www.speech.cs.cmu.edu/tools/ for more information | ||
|
||
The (fixed) discount mass is 0.5. The backoffs are computed using the ratio method. | ||
This model based on a corpus of 9 sentences and 17 words | ||
|
||
\data\ | ||
ngram 1=17 | ||
ngram 2=24 | ||
ngram 3=18 | ||
|
||
\1-grams: | ||
-0.9031 </s> -0.3010 | ||
-0.9031 <s> -0.2430 | ||
-1.8573 A -0.2950 | ||
-1.8573 BACKWARD -0.2430 | ||
-1.8573 CODERBOT -0.2430 | ||
-1.8573 FORWARD -0.2430 | ||
-1.8573 LEFT -0.2430 | ||
-1.5563 MOVE -0.2888 | ||
-1.8573 OKAY -0.2950 | ||
-1.8573 PICTURE -0.2430 | ||
-1.8573 RECORD -0.2888 | ||
-1.8573 RIGHT -0.2430 | ||
-1.8573 SAY -0.2430 | ||
-1.8573 STOP -0.2888 | ||
-1.8573 TAKE -0.2950 | ||
-1.5563 TURN -0.2888 | ||
-1.5563 VIDEO -0.2430 | ||
|
||
\2-grams: | ||
-0.9542 <s> MOVE 0.0000 | ||
-1.2553 <s> OKAY 0.0000 | ||
-1.2553 <s> RECORD 0.0000 | ||
-1.2553 <s> SAY 0.0000 | ||
-1.2553 <s> STOP 0.0000 | ||
-1.2553 <s> TAKE 0.0000 | ||
-0.9542 <s> TURN 0.0000 | ||
-0.3010 A PICTURE 0.0000 | ||
-0.3010 BACKWARD </s> -0.3010 | ||
-0.3010 CODERBOT </s> -0.3010 | ||
-0.3010 FORWARD </s> -0.3010 | ||
-0.3010 LEFT </s> -0.3010 | ||
-0.6021 MOVE BACKWARD 0.0000 | ||
-0.6021 MOVE FORWARD 0.0000 | ||
-0.3010 OKAY CODERBOT 0.0000 | ||
-0.3010 PICTURE </s> -0.3010 | ||
-0.3010 RECORD VIDEO 0.0000 | ||
-0.3010 RIGHT </s> -0.3010 | ||
-0.3010 SAY </s> -0.3010 | ||
-0.3010 STOP VIDEO 0.0000 | ||
-0.3010 TAKE A 0.0000 | ||
-0.6021 TURN LEFT 0.0000 | ||
-0.6021 TURN RIGHT 0.0000 | ||
-0.3010 VIDEO </s> -0.3010 | ||
|
||
\3-grams: | ||
-0.6021 <s> MOVE BACKWARD | ||
-0.6021 <s> MOVE FORWARD | ||
-0.3010 <s> OKAY CODERBOT | ||
-0.3010 <s> RECORD VIDEO | ||
-0.3010 <s> SAY </s> | ||
-0.3010 <s> STOP VIDEO | ||
-0.3010 <s> TAKE A | ||
-0.6021 <s> TURN LEFT | ||
-0.6021 <s> TURN RIGHT | ||
-0.3010 A PICTURE </s> | ||
-0.3010 MOVE BACKWARD </s> | ||
-0.3010 MOVE FORWARD </s> | ||
-0.3010 OKAY CODERBOT </s> | ||
-0.3010 RECORD VIDEO </s> | ||
-0.3010 STOP VIDEO </s> | ||
-0.3010 TAKE A PICTURE | ||
-0.3010 TURN LEFT </s> | ||
-0.3010 TURN RIGHT </s> | ||
|
||
\end\ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
MOVE /1e-10/ | ||
FORWARD /1e-10/ | ||
BACKWARD /1e-10/ | ||
LEFT /1e-10/ | ||
RIGHT /1e-10/ | ||
STOP /1e-10/ |
Oops, something went wrong.