Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(speech): Add speech StreamingRecognize samples #3753

Merged
merged 12 commits into from
Jun 4, 2024
134 changes: 134 additions & 0 deletions speech/snippets/transcribe_streaming_v2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Command livecaption_from_file streams a local audio file to
// Google Speech API and outputs the transcript.

package snippets

// [START speech_transcribe_streaming_v2]
import (
"context"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"

speech "cloud.google.com/go/speech/apiv2"
"cloud.google.com/go/speech/apiv2/speechpb"
)

var projectID string
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved

const location = "global"
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved

func transcribeStreamingV2(w io.Writer, path string, projectID string) error {
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved

audioFile, err := filepath.Abs(path)
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.Println("Failed to load file: ", path)
return err
}

ctx := context.Background()

client, err := speech.NewClient(ctx)
if err != nil {
log.Println(err)
return err
}
stream, err := client.StreamingRecognize(ctx)
if err != nil {
log.Println(err)
return err
}
// Send the initial configuration message.
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
// In case of specific file encoding , so specify the decoding config.
DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
Model: "long",
LanguageCodes: []string{"en-US"},
Features: &speechpb.RecognitionFeatures{
MaxAlternatives: 2,
},
},
StreamingFeatures: &speechpb.StreamingRecognitionFeatures{InterimResults: true},
},
},
}); err != nil {
log.Println(err)
return err
}

f, err := os.Open(audioFile)
if err != nil {
log.Println(err)
return err
}
defer f.Close()

go func() error {
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved
buf := make([]byte, 1024)
for {
n, err := f.Read(buf)
if n > 0 {
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: buf[:n],
},
}); err != nil {
return fmt.Errorf("could not send audio: %v", err)
}
}
if err == io.EOF {
// Nothing else to pipe, close the stream.
if err := stream.CloseSend(); err != nil {
return fmt.Errorf("could not close stream: %w", err)
}
return nil
}
if err != nil {
log.Printf("Could not read from %s: %v", audioFile, err)
continue
}
}
}()

for {
resp, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("cannot stream results: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
fmt.Fprintf(w, "Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
}
}
}
return nil
}

// [END speech_transcribe_streaming_v2]
138 changes: 138 additions & 0 deletions speech/snippets/transcribe_streaming_v2_explicit_decoding.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Command livecaption_from_file streams a local audio file to
// Google Speech API and outputs the transcript.

package snippets

// [START speech_transcribe_streaming_v2_explicit_decoding]
import (
"context"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"

speech "cloud.google.com/go/speech/apiv2"
"cloud.google.com/go/speech/apiv2/speechpb"
)

func transcribeStreamingSpecificDecodingV2(w io.Writer, path string, projectID string) error {
audioFile, err := filepath.Abs(path)
if err != nil {
log.Println("Failed to load file: ", path)
return err
}

ctx := context.Background()

client, err := speech.NewClient(ctx)
if err != nil {
log.Println(err)
return err
}
stream, err := client.StreamingRecognize(ctx)
if err != nil {
log.Println(err)
return err
}
// Send the initial configuration message.
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
// In case of specific file encoding , so specify the decoding config.
//DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
Encoding: speechpb.ExplicitDecodingConfig_LINEAR16,
SampleRateHertz: 16000,
AudioChannelCount: 1,
},
},
Model: "long",
LanguageCodes: []string{"en-US"},
Features: &speechpb.RecognitionFeatures{
MaxAlternatives: 2,
},
},
StreamingFeatures: &speechpb.StreamingRecognitionFeatures{InterimResults: true},
},
},
}); err != nil {
log.Println(err)
return err
}

f, err := os.Open(audioFile)
if err != nil {
log.Println(err)
return err
}
defer f.Close()

go func() error {
buf := make([]byte, 1024)
for {
n, err := f.Read(buf)
if n > 0 {
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: buf[:n],
},
}); err != nil {
return fmt.Errorf("could not send audio: %v", err)
}
}
if err == io.EOF {
// Nothing else to pipe, close the stream.
if err := stream.CloseSend(); err != nil {
return fmt.Errorf("could not close stream: %w", err)
}
return nil
}
if err != nil {
log.Printf("Could not read from %s: %v", audioFile, err)
continue
}
}
}()

for {
resp, err := stream.Recv()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("cannot stream results: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
log.Printf("Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
fmt.Fprintf(w, "Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
}

}
}
return nil
}

// [END speech_transcribe_streaming_v2_explicit_decoding]
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snippets

import (
"bytes"
"os"
"strings"
"testing"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
)

var recognitionAudioFileRawLINEAR16 = "../testdata/audio.raw"

func TestTranscribeStreamingV2SpecificDecoding(t *testing.T) {
testutil.SystemTest(t)
projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
var buf bytes.Buffer
if err := transcribeStreamingSpecificDecodingV2(&buf, recognitionAudioFileRawLINEAR16, projectID); err != nil {
t.Fatalf("error in transcribe rawfile %v", err)
}
if got := buf.String(); !strings.Contains(got, "Brooklyn Bridge") {
t.Errorf("transcribe_streaming_v2_explicit_decoding got %q, expected %q", got, "Speaker")
}
}
36 changes: 36 additions & 0 deletions speech/snippets/transcribe_streaming_v2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snippets

import (
"bytes"
"os"
"strings"
"testing"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
)

func TestTranscribeStreamingV2(t *testing.T) {
testutil.SystemTest(t)
shubhamkr619 marked this conversation as resolved.
Show resolved Hide resolved
projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
var buf bytes.Buffer
if err := transcribeStreamingV2(&buf, recognitionAudioFile, projectID); err != nil {
t.Fatalf("error in transcribe %v", err)
}
if got := buf.String(); !strings.Contains(got, "Chromecast") {
t.Errorf("transcribe_streaming_v2 got %q, expected %q", got, "Speaker")
}
}
Loading