Skip to content

Commit

Permalink
added support for v2 apis, removed old code
Browse files Browse the repository at this point in the history
  • Loading branch information
shubhamkr619 committed Jan 25, 2024
1 parent 7adefda commit 2d6510d
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 212 deletions.
32 changes: 0 additions & 32 deletions speech/livecaption_from_file_v2/README.md

This file was deleted.

87 changes: 0 additions & 87 deletions speech/livecaption_v2/README.md

This file was deleted.

38 changes: 38 additions & 0 deletions speech/snippets/transcribe_streaming_decoder_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snippets

import (
"bytes"
"os"
"strings"
"testing"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
)

var recognitionAudioFileRawLINEAR16 = "../testdata/audio.raw"

func TestTrascribeStreamingV2SpecificDecoding(t *testing.T) {
testutil.SystemTest(t)
projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
var buf bytes.Buffer
if err := transcribe_streaming_specific_decoding_v2(&buf, recognitionAudioFileRawLINEAR16, projectID); err != nil {
t.Fatalf("error in transcribe diarization gcs %v", err)
}
if got := buf.String(); !strings.Contains(got, "Brooklyn Bridge") {
t.Errorf("transcribe_diarization_gcs_beta got %q, expected %q", got, "Speaker")
}
}
36 changes: 36 additions & 0 deletions speech/snippets/transcribe_streaming_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package snippets

import (
"bytes"
"os"
"strings"
"testing"

"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
)

func TestTrascribeStreamingV2(t *testing.T) {
testutil.SystemTest(t)
projectID := os.Getenv("GOLANG_SAMPLES_PROJECT_ID")
var buf bytes.Buffer
if err := transcribe_streaming_v2(&buf, recognitionAudioFile, projectID); err != nil {
t.Fatalf("error in transcribe diarization gcs %v", err)
}
if got := buf.String(); !strings.Contains(got, "Chromecast") {
t.Errorf("transcribe_diarization_gcs_beta got %q, expected %q", got, "Speaker")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// Command livecaption pipes the stdin audio data to
// Command livecaption_from_file streams a local audio file to
// Google Speech API and outputs the transcript.
//
// As an example, gst-launch can be used to capture the mic input:
//
// $ gst-launch-1.0 -v pulsesrc ! audioconvert ! audioresample ! audio/x-raw,channels=1,rate=16000 ! filesink location=/dev/stdout | livecaption <project_id>

package main
package snippets

// [START speech_transcribe_streaming_mic]
// [START speech_transcribe_streaming]
import (
"context"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"

speech "cloud.google.com/go/speech/apiv2"
"cloud.google.com/go/speech/apiv2/speechpb"
Expand All @@ -38,49 +35,36 @@ var projectID string

const location = "global"

func main() {
ctx := context.Background()
func transcribe_streaming_v2(w io.Writer, path string, projectID string) error {

flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s <Project_id>\n", os.Args[0])
fmt.Fprintf(os.Stderr, "<projectID> must be a project_id to a valid gcp projectID with speech api enabled.\n")

}
flag.Parse()
if len(flag.Args()) != 1 {
log.Fatal("Please pass the project_id as a command line argument. Should be a valid project_id with stt api enabled.")
audioFile, err := filepath.Abs(path)
if err != nil {
log.Println("Failed to load file: ", path)
return err
}
projectID = flag.Arg(0)

if projectID == "" {
log.Fatalf("Project is is required parameter: %s", projectID)
}
ctx := context.Background()

client, err := speech.NewClient(ctx)
if err != nil {
log.Fatal(err)
log.Println(err)
return err
}
stream, err := client.StreamingRecognize(ctx)
if err != nil {
log.Fatal(err)
log.Println(err)
return err
}

// Send the initial configuration message.
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_StreamingConfig{
StreamingConfig: &speechpb.StreamingRecognitionConfig{
Config: &speechpb.RecognitionConfig{
// In case of specific file encoding , so specify the decoding config.
//DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
DecodingConfig: &speechpb.RecognitionConfig_ExplicitDecodingConfig{
ExplicitDecodingConfig: &speechpb.ExplicitDecodingConfig{
Encoding: speechpb.ExplicitDecodingConfig_LINEAR16,
SampleRateHertz: 16000,
AudioChannelCount: 1,
},
},
Model: "long",
LanguageCodes: []string{"en-US"},
DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{},
Model: "long",
LanguageCodes: []string{"en-US"},
Features: &speechpb.RecognitionFeatures{
MaxAlternatives: 2,
},
Expand All @@ -89,36 +73,40 @@ func main() {
},
},
}); err != nil {
log.Fatal(err)
log.Println(err)
return err
}

go func() {
// Pipe stdin to the API.
buf := make([]byte, 1024)
f, err := os.Open(audioFile)
if err != nil {
log.Println(err)
return err
}
defer f.Close()

go func() error {
buf := make([]byte, 1024)
for {

n, err := os.Stdin.Read(buf)

n, err := f.Read(buf)
if n > 0 {
if err := stream.Send(&speechpb.StreamingRecognizeRequest{
Recognizer: fmt.Sprintf("projects/%s/locations/%s/recognizers/_", projectID, location),
StreamingRequest: &speechpb.StreamingRecognizeRequest_Audio{
Audio: buf[:n],
},
}); err != nil {
log.Printf("Could not send audio: %v", err)
return fmt.Errorf("could not send audio: %v", err)
}
}
if err == io.EOF {
// Nothing else to pipe, close the stream.
if err := stream.CloseSend(); err != nil {
log.Fatalf("Could not close stream: %v", err)
return fmt.Errorf("could not close stream: %w", err)
}
return
return nil
}
if err != nil {
log.Printf("Could not read from stdin: %v", err)
log.Printf("Could not read from %s: %v", audioFile, err)
continue
}
}
Expand All @@ -127,25 +115,20 @@ func main() {
for {
resp, err := stream.Recv()
if err == io.EOF {
log.Printf("EOF break")
break
}
if err != nil {
log.Fatalf("Could not recognize: %v", err)
} else {
// It seems like the new response api does not have a field called Error
for _, result := range resp.Results {
//fmt.Printf("Result: %+v\n", result)
if len(result.Alternatives) > 0 {
if result.IsFinal == true {
log.Println("result", result.Alternatives[0].Transcript, result.IsFinal)
}

}
return fmt.Errorf("cannot stream results: %v", err)
}
for i, result := range resp.Results {
fmt.Fprintf(w, "%s\n", strings.Repeat("-", 20))
fmt.Fprintf(w, "Result %d\n", i+1)
for j, alternative := range result.Alternatives {
fmt.Fprintf(w, "Alternative %d is_final: %t : %s\n", j+1, result.IsFinal, alternative.Transcript)
}
}

}
return nil
}

// [END speech_transcribe_streaming_mic]
// [END speech_transcribe_streaming]

0 comments on commit 2d6510d

Please sign in to comment.