added support for v2 apis, removed old code

GoogleCloudPlatform · Jan 25, 2024 · 2d6510d · 2d6510d
1 parent 7adefda
commit 2d6510d
Show file tree

Hide file tree

Showing 6 changed files with 145 additions and 212 deletions.
diff --git a/speech/livecaption_from_file_v2/README.md b/speech/livecaption_from_file_v2/README.md
diff --git a/speech/livecaption_v2/README.md b/speech/livecaption_v2/README.md
diff --git a/speech/snippets/transcribe_streaming_decoder_test.go b/speech/snippets/transcribe_streaming_decoder_test.go
@@ -0,0 +1,38 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snippets
+
+import (
+	"bytes"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
+)
+
+var recognitionAudioFileRawLINEAR16 = "../testdata/audio.raw"
+
+func TestTrascribeStreamingV2SpecificDecoding(t *testing.T) {
+	testutil.SystemTest(t)
+	projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
+	var buf bytes.Buffer
+	if err := transcribe_streaming_specific_decoding_v2(&buf, recognitionAudioFileRawLINEAR16, projectID); err != nil {
+		t.Fatalf("error in transcribe diarization gcs %v", err)
+	}
+	if got := buf.String(); !strings.Contains(got, "Brooklyn Bridge") {
+		t.Errorf("transcribe_diarization_gcs_beta got %q, expected %q", got, "Speaker")
+	}
+}
diff --git a/speech/snippets/transcribe_streaming_test.go b/speech/snippets/transcribe_streaming_test.go
@@ -0,0 +1,36 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snippets
+
+import (
+	"bytes"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
+)
+
+func TestTrascribeStreamingV2(t *testing.T) {
+	testutil.SystemTest(t)
+	projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
+	var buf bytes.Buffer
+	if err := transcribe_streaming_v2(&buf, recognitionAudioFile, projectID); err != nil {
+		t.Fatalf("error in transcribe diarization gcs %v", err)
+	}
+	if got := buf.String(); !strings.Contains(got, "Chromecast") {
+		t.Errorf("transcribe_diarization_gcs_beta got %q, expected %q", got, "Speaker")
+	}
+}
diff --git a/speech/livecaption_v2/livecaption.go → speech/snippets/transcribe_streaming_v2.go b/speech/livecaption_v2/livecaption.go → speech/snippets/transcribe_streaming_v2.go
@@ -12,23 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Command livecaption pipes the stdin audio data to
+// Command livecaption_from_file streams a local audio file to
 // Google Speech API and outputs the transcript.
-//
-// As an example, gst-launch can be used to capture the mic input:
-//
-//	$ gst-launch-1.0 -v pulsesrc ! audioconvert ! audioresample ! audio/x-raw,channels=1,rate=16000 ! filesink location=/dev/stdout | livecaption <project_id>
 
-package main
+package snippets
 
-// [START speech_transcribe_streaming_mic]
+// [START speech_transcribe_streaming]
 import (
 	"context"
-	"flag"
 	"fmt"
 	"io"
 	"log"
 	"os"
+	"path/filepath"
+	"strings"
 
 	speech "cloud.google.com/go/speech/apiv2"
 	"cloud.google.com/go/speech/apiv2/speechpb"
@@ -38,49 +35,36 @@ var projectID string
 
 const location = "global"
 
-func main() {
-	ctx := context.Background()
+func transcribe_streaming_v2(w io.Writer, path string, projectID string) error {
 
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage: %s <Project_id>\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "<projectID> must be a project_id to a valid gcp projectID with speech api enabled.\n")
-
-	}
-	flag.Parse()
-	if len(flag.Args()) != 1 {
-		log.Fatal("Please pass the project_id as a command line argument. Should be a valid project_id with stt api enabled.")
+	audioFile, err := filepath.Abs(path)
+	if err != nil {
+		log.Println("Failed to load file: ", path)
+		return err
 	}
-	projectID = flag.Arg(0)
 
-	if projectID == "" {
-		log.Fatalf("Project is is required parameter: %s", projectID)
-	}
+	ctx := context.Background()
 
 	client, err := speech.NewClient(ctx)
 	if err != nil {
-		log.Fatal(err)
+		log.Println(err)
+		return err
 	}
 	stream, err := client.StreamingRecognize(ctx)
 	if err != nil {
-		log.Fatal(err)
+		log.Println(err)
+		return err
 	}
-
+	// Send the initial configuration message.
 	if err := stream.Send(&speechpb.StreamingRecognizeRequest{
 		Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
 		StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
 			StreamingConfig: &speechpb.StreamingRecognitionConfig{
 				Config: &speechpb.RecognitionConfig{
 					// In case of specific file encoding , so specify the decoding config.
-					//DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
-					DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
-						ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
-							Encoding:          speechpb.ExplicitDecodingConfig_LINEAR16,
-							SampleRateHertz:   16000,
-							AudioChannelCount: 1,
-						},
-					},
-					Model:         "long",
-					LanguageCodes: []string{"en-US"},
+					DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
+					Model:          "long",
+					LanguageCodes:  []string{"en-US"},
 					Features: &speechpb.RecognitionFeatures{
 						MaxAlternatives: 2,
 					},
@@ -89,36 +73,40 @@ func main() {
 			},
 		},
 	}); err != nil {
-		log.Fatal(err)
+		log.Println(err)
+		return err
 	}
 
-	go func() {
-		// Pipe stdin to the API.
-		buf := make([]byte, 1024)
+	f, err := os.Open(audioFile)
+	if err != nil {
+		log.Println(err)
+		return err
+	}
+	defer f.Close()
 
+	go func() error {
+		buf := make([]byte, 1024)
 		for {
-
-			n, err := os.Stdin.Read(buf)
-
+			n, err := f.Read(buf)
 			if n > 0 {
 				if err := stream.Send(&speechpb.StreamingRecognizeRequest{
 					Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
 					StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
 						Audio: buf[:n],
 					},
 				}); err != nil {
-					log.Printf("Could not send audio: %v", err)
+					return fmt.Errorf("could not send audio: %v", err)
 				}
 			}
 			if err == io.EOF {
 				// Nothing else to pipe, close the stream.
 				if err := stream.CloseSend(); err != nil {
-					log.Fatalf("Could not close stream: %v", err)
+					return fmt.Errorf("could not close stream: %w", err)
 				}
-				return
+				return nil
 			}
 			if err != nil {
-				log.Printf("Could not read from stdin: %v", err)
+				log.Printf("Could not read from %s: %v", audioFile, err)
 				continue
 			}
 		}
@@ -127,25 +115,20 @@ func main() {
 	for {
 		resp, err := stream.Recv()
 		if err == io.EOF {
-			log.Printf("EOF break")
 			break
 		}
 		if err != nil {
-			log.Fatalf("Could not recognize: %v", err)
-		} else {
-			// It seems like the new response api does not have a field called Error
-			for _, result := range resp.Results {
-				//fmt.Printf("Result: %+v\n", result)
-				if len(result.Alternatives) > 0 {
-					if result.IsFinal == true {
-						log.Println("result", result.Alternatives[0].Transcript, result.IsFinal)
-					}
-
-				}
+			return fmt.Errorf("cannot stream results: %v", err)
+		}
+		for i, result := range resp.Results {
+			fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
+			fmt.Fprintf(w, "Result %d\n", i+1)
+			for j, alternative := range result.Alternatives {
+				fmt.Fprintf(w, "Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
 			}
 		}
-
 	}
+	return nil
 }
 
-// [END speech_transcribe_streaming_mic]
+// [END speech_transcribe_streaming]