Skip to content

Commit

Permalink
feat(speech): Add speech StreamingRecognize samples (#3753)
Browse files Browse the repository at this point in the history
* feature: add support to v2 stt apis

* fix readme

* refactor variables

* added support for v2 apis, removed old code

* updated license for new files

* applied suggested fixed

* fix tags for region issue

* applied the suggested changes

* applied the suggested changes

* add support for location

---------

Co-authored-by: Kodanda Rama <kodrama@google.com>
Co-authored-by: Marc Dougherty <muncus@users.noreply.github.com>
  • Loading branch information
3 people committed Jun 4, 2024
1 parent 450027d commit 743ff06
Show file tree
Hide file tree
Showing 4 changed files with 339 additions and 0 deletions.
128 changes: 128 additions & 0 deletions speech/snippets/transcribe_streaming_v2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Command livecaption_from_file streams a local audio file to
// Google Speech API and outputs the transcript.

package snippets

// [START speech_transcribe_streaming_v2]
import (
"context"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"

speech "cloud.google.com/go/speech/apiv2"
"cloud.google.com/go/speech/apiv2/speechpb"
)

func transcribeStreamingV2(w io.Writer, projectID string, path string) error {
const location = "global"
audioFile, err := filepath.Abs(path)
if err != nil {
log.Println("Failed to load file: ", path)
return err
}
f, err := os.Open(audioFile)
if err != nil {
return err
}
defer f.Close()

ctx := context.Background()

client, err := speech.NewClient(ctx)
if err != nil {
log.Println(err)
return err
}
stream, err := client.StreamingRecognize(ctx)
if err != nil {
log.Println(err)
return err
}
// Send the initial configuration message.
err = stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
// In case of specific file encoding , so specify the decoding config.
DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
Model: "long",
LanguageCodes: []string{"en-US"},
Features: &speechpb.RecognitionFeatures{
MaxAlternatives: 2,
},
},
StreamingFeatures: &speechpb.StreamingRecognitionFeatures{InterimResults: true},
},
},
})
if err != nil {
return err
}

go func() error {
buf := make([]byte, 1024)
for {
n, err := f.Read(buf)
if n > 0 {
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: buf[:n],
},
}); err != nil {
return fmt.Errorf("could not send audio: %v", err)
}
}
if err == io.EOF {
// Nothing else to pipe, close the stream.
if err := stream.CloseSend(); err != nil {
return fmt.Errorf("could not close stream: %w", err)
}
return nil
}
if err != nil {
log.Printf("Could not read from %s: %v", audioFile, err)
continue
}
}
}()

for {
resp, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("cannot stream results: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
fmt.Fprintf(w, "Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
}
}
}
return nil
}

// [END speech_transcribe_streaming_v2]
137 changes: 137 additions & 0 deletions speech/snippets/transcribe_streaming_v2_explicit_decoding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Command livecaption_from_file streams a local audio file to
// Google Speech API and outputs the transcript.

package snippets

// [START speech_transcribe_streaming_v2_explicit_decoding]
import (
"context"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"

speech "cloud.google.com/go/speech/apiv2"
"cloud.google.com/go/speech/apiv2/speechpb"
)

func transcribeStreamingSpecificDecodingV2(w io.Writer, projectID string, path string) error {
const location = "global"
audioFile, err := filepath.Abs(path)
if err != nil {
log.Println("Failed to load file: ", path)
return err
}
f, err := os.Open(audioFile)
if err != nil {
return err
}
defer f.Close()

ctx := context.Background()

client, err := speech.NewClient(ctx)
if err != nil {
log.Println(err)
return err
}
stream, err := client.StreamingRecognize(ctx)
if err != nil {
log.Println(err)
return err
}
// Send the initial configuration message.
err = stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
// In case of specific file encoding , so specify the decoding config.
//DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
Encoding: speechpb.ExplicitDecodingConfig_LINEAR16,
SampleRateHertz: 16000,
AudioChannelCount: 1,
},
},
Model: "long",
LanguageCodes: []string{"en-US"},
Features: &speechpb.RecognitionFeatures{
MaxAlternatives: 2,
},
},
StreamingFeatures: &speechpb.StreamingRecognitionFeatures{InterimResults: true},
},
},
})
if err != nil {
return err
}

go func() error {
buf := make([]byte, 1024)
for {
n, err := f.Read(buf)
if n > 0 {
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: buf[:n],
},
}); err != nil {
return fmt.Errorf("could not send audio: %v", err)
}
}
if err == io.EOF {
// Nothing else to pipe, close the stream.
if err := stream.CloseSend(); err != nil {
return fmt.Errorf("could not close stream: %w", err)
}
return nil
}
if err != nil {
log.Printf("Could not read from %s: %v", audioFile, err)
continue
}
}
}()

for {
resp, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("cannot stream results: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
log.Printf("Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
fmt.Fprintf(w, "Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
}

}
}
return nil
}

// [END speech_transcribe_streaming_v2_explicit_decoding]
38 changes: 38 additions & 0 deletions speech/snippets/transcribe_streaming_v2_explicit_decoding_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snippets

import (
"bytes"
"os"
"strings"
"testing"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
)

var recognitionAudioFileRawLINEAR16 = "../testdata/audio.raw"

func TestTranscribeStreamingV2SpecificDecoding(t *testing.T) {
testutil.SystemTest(t)
projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
var buf bytes.Buffer
if err := transcribeStreamingSpecificDecodingV2(&buf, projectID, recognitionAudioFileRawLINEAR16); err != nil {
t.Fatalf("error in transcribe rawfile %v", err)
}
if got := buf.String(); !strings.Contains(got, "Brooklyn Bridge") {
t.Errorf("transcribe_streaming_v2_explicit_decoding got %q, expected %q", got, "Speaker")
}
}
36 changes: 36 additions & 0 deletions speech/snippets/transcribe_streaming_v2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snippets

import (
"bytes"
"os"
"strings"
"testing"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
)

func TestTranscribeStreamingV2(t *testing.T) {
testutil.SystemTest(t)
projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
var buf bytes.Buffer
if err := transcribeStreamingV2(&buf, projectID, recognitionAudioFile); err != nil {
t.Fatalf("error in transcribe %v", err)
}
if got := buf.String(); !strings.Contains(got, "Chromecast") {
t.Errorf("transcribe_streaming_v2 got %q, expected %q", got, "Speaker")
}
}

0 comments on commit 743ff06

Please sign in to comment.