Permalink
Browse files

Made speex decoding, flac encoding on the fly

  • Loading branch information...
Eichhoernchen committed Jan 25, 2012
1 parent e3d8f6b commit 50884585a9809eda415aa0938bf9d826aa5162c7
Showing with 19 additions and 32 deletions.
  1. +1 −2 flac.py
  2. +0 −11 reencode.py
  3. +17 −18 siriServer.py
  4. +1 −1 speex.py
View
@@ -22,7 +22,7 @@ def writeCallBack(encoder, buffer, bytes, samples, current_frame, client_data):
class Encoder:
- def initialize(self, sample_rate, channels, bps, numSamples):
+ def initialize(self, sample_rate, channels, bps):
libflac.FLAC__stream_encoder_new.restype = c_void_p
libflac.FLAC__stream_encoder_set_verify.argtypes = [c_void_p, c_bool]
libflac.FLAC__stream_encoder_set_verify.restype = c_bool
@@ -70,7 +70,6 @@ def initialize(self, sample_rate, channels, bps, numSamples):
ok &= libflac.FLAC__stream_encoder_set_channels(self.encoder, channels)
ok &= libflac.FLAC__stream_encoder_set_bits_per_sample(self.encoder, bps);
ok &= libflac.FLAC__stream_encoder_set_sample_rate(self.encoder, sample_rate);
- ok &= libflac.FLAC__stream_encoder_set_total_samples_estimate(self.encoder, numSamples);
self.output = ""
View
@@ -1,11 +0,0 @@
-from speex import *
-import sys
-import math
-
-def decodeToPCM(appleSpeex, rate, quality):
- dec = Decoder()
- dec.initialize(mode=SPEEX_MODEID_WB)
- raw = dec.decode(appleSpeex)
- dec.destroy()
- return raw
-
View
@@ -1,7 +1,7 @@
import socket, ssl, sys, zlib, binascii, time, select, struct, biplist
from email.utils import formatdate
import uuid
-import reencode
+import speex
import flac
import json
import asyncore
@@ -140,31 +140,30 @@ def process_compressed_data(self):
if object['class'] == 'DestroyAssistant':
self.send_plist({"class": "AssistantDestroyed", "properties": {"assistantId": object['properties']['assistantId']}, "aceId":str(uuid.uuid4()), "refId":object['aceId'], "group":"com.apple.ace.system"})
- if object['class'] == 'StartSpeechRequest':
- self.speech[object['aceId']] = []
-
- if object['class'] == 'StartSpeechDictation':
- self.speech[object['aceId']] = []
+ if object['class'] == 'StartSpeechRequest' or object['class'] == 'StartSpeechDictation':
+ decoder = speex.Decoder()
+ decoder.initialize(mode=speex.SPEEX_MODEID_WB)
+ encoder = flac.Encoder()
+ encoder.initialize(16000, 1, 16) #16kHz sample rate, 1 channel, 16 bits per sample
+ self.speech[object['aceId']] = (decoder, encoder)
if object['class'] == 'SpeechPacket':
- self.speech[object['refId']] += object['properties']['packets']
+ (decoder, encoder) = self.speech[object['refId']]
+ pcm = decoder.decode(object['properties']['packets'])
+ encoder.encode(pcm)
if object['class'] == 'CancelRequest':
# we should test if this stil exists..
del self.speech[object['refId']]
if object['class'] == 'FinishSpeech':
- #this should be done async
- pcm = reencode.decodeToPCM(self.speech[object['refId']], 16000, 8)
+ (decoder, encoder) = self.speech[object['refId']]
+ decoder.destroy()
+ encoder.finish()
+ flacBin = encoder.getBinary()
+ encoder.destroy()
del self.speech[object['refId']]
- enc = flac.Encoder()
- numSamples = int(len(pcm)/2)
- print "Having audio of {0}ms".format(int(len(pcm)/2/16))
- enc.initialize(16000, 1, 16, numSamples)
- enc.encode(pcm)
- enc.finish()
- flacBin = enc.getBinary()
- enc.destroy()
+ #this should be done async
http_request = "POST /speech-api/v1/recognize?xjerr=1&client=chromium&pfilter=2&lang=de-DE&maxresults=6 HTTP/1.0\r\nHost: www.google.com\r\nContent-Type: audio/x-flac; rate=16000\r\nContent-Length: %d\r\n\r\n" % len(flacBin)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -182,7 +181,7 @@ def process_compressed_data(self):
print u"Best matching result: \"{0}\" with a confidence of {1}%".format(best_match, round(float(best_match_confidence)*100,2))
# construct a SpeechRecognized
- token = speechObjects.Token(best_match, 0, int(len(pcm)/2/16), 1000.0, True, True)
+ token = speechObjects.Token(best_match, 0, 0, 1000.0, True, True)
interpretation = speechObjects.Interpretation([token])
phrase = speechObjects.Phrase(lowConfidence=False, interpretations=[interpretation])
recognition = speechObjects.Recognition([phrase])
View
@@ -104,7 +104,7 @@ def decode(self, data):
self.buffer = create_string_buffer(1024)
decoded_frame = (c_int16*self.frame_size.value)()
- out = ""
+ out = ""
for i in range(0,len(data)):
self.buffer = data[i]
libspeex.speex_bits_read_from(byref(self.bits), self.buffer, len(data[i]))

0 comments on commit 5088458

Please sign in to comment.