Multi-channel GA sample (#295)

GoogleCloudPlatform · Feb 20, 2019 · eaacbe8 · eaacbe8
1 parent a131760
commit eaacbe8
Show file tree

Hide file tree

Showing 2 changed files with 120 additions and 0 deletions.
diff --git a/speech/recognize.js b/speech/recognize.js
@@ -635,6 +635,86 @@ async function syncRecognizeWithEnhancedModel(
   // [END speech_transcribe_enhanced_model]
 }
 
+async function syncRecognizeWithMultiChannel(fileName) {
+  // [START speech_transcribe_multichannel]
+  const fs = require('fs');
+
+  // Imports the Google Cloud client library
+  const speech = require('@google-cloud/speech').v1;
+
+  // Creates a client
+  const client = new speech.SpeechClient();
+
+  /**
+   * TODO(developer): Uncomment the following lines before running the sample.
+   */
+  // const fileName = 'Local path to audio file, e.g. /path/to/audio.raw';
+
+  const config = {
+    encoding: `LINEAR16`,
+    languageCode: `en-US`,
+    audioChannelCount: 2,
+    enableSeparateRecognitionPerChannel: true,
+  };
+
+  const audio = {
+    content: fs.readFileSync(fileName).toString('base64'),
+  };
+
+  const request = {
+    config: config,
+    audio: audio,
+  };
+
+  const [response] = await client.recognize(request);
+  const transcription = response.results
+    .map(
+      result =>
+        ` Channel Tag: ${result.channelTag} ${
+          result.alternatives[0].transcript
+        }`
+    )
+    .join('\n');
+  console.log(`Transcription: \n${transcription}`);
+  // [END speech_transcribe_multichannel]
+}
+
+async function syncRecognizeWithMultiChannelGCS(gcsUri) {
+  // [START speech_transcribe_multichannel_gcs]
+  const speech = require('@google-cloud/speech').v1;
+
+  // Creates a client
+  const client = new speech.SpeechClient();
+
+  const config = {
+    encoding: 'LINEAR16',
+    languageCode: `en-US`,
+    audioChannelCount: 2,
+    enableSeparateRecognitionPerChannel: true,
+  };
+
+  const audio = {
+    uri: gcsUri,
+  };
+
+  const request = {
+    config: config,
+    audio: audio,
+  };
+
+  const [response] = await client.recognize(request);
+  const transcription = response.results
+    .map(
+      result =>
+        ` Channel Tag: ${result.channelTag} ${
+          result.alternatives[0].transcript
+        }`
+    )
+    .join('\n');
+  console.log(`Transcription: \n${transcription}`);
+  // [END speech_transcribe_multichannel_gcs]
+}
+
 require(`yargs`) // eslint-disable-line
   .demand(1)
   .command(
@@ -782,6 +862,30 @@ require(`yargs`) // eslint-disable-line
         opts.languageCode
       )
   )
+  .command(
+    `sync-multi-channel <filename>`,
+    `Differentiates input by audio channel in local audio file.`,
+    {},
+    opts =>
+      syncRecognizeWithMultiChannel(
+        opts.filename,
+        opts.encoding,
+        opts.sampleRateHertz,
+        opts.languageCode
+      )
+  )
+  .command(
+    `sync-multi-channel-gcs <gcsUri>`,
+    `Differentiates input by audio channel in an audio file located in a Google Cloud Storage bucket.`,
+    {},
+    opts =>
+      syncRecognizeWithMultiChannelGCS(
+        opts.gcsUri,
+        opts.encoding,
+        opts.sampleRateHertz,
+        opts.languageCode
+      )
+  )
   .options({
     encoding: {
       alias: 'e',
@@ -817,6 +921,7 @@ require(`yargs`) // eslint-disable-line
   )
   .example(`node $0 sync-auto-punctuation ./resources/commercial_mono.wav`)
   .example(`node $0 sync-enhanced-model ./resources/commercial_mono.wav`)
+  .example(`node $0 sync-multi-channel ./resources/commercial_stereo.wav`)
   .wrap(120)
   .recommendCommands()
   .epilogue(`For more information, see https://cloud.google.com/speech/docs`)

diff --git a/speech/system-test/recognize.test.js b/speech/system-test/recognize.test.js
@@ -29,9 +29,11 @@ const resourcePath = path.join(__dirname, '..', 'resources');
 const filename = `audio.raw`;
 const filename1 = `Google_Gnome.wav`;
 const filename2 = `commercial_mono.wav`;
+const filename3 = `commercial_stereo.wav`;
 const filepath = path.join(resourcePath, filename);
 const filepath1 = path.join(resourcePath, filename1);
 const filepath2 = path.join(resourcePath, filename2);
+const filepath3 = path.join(resourcePath, filename3);
 const text = 'how old is the Brooklyn Bridge';
 const text1 = 'the weather outside is sunny';
 const text2 = `Terrific. It's on the way.`;
@@ -43,6 +45,7 @@ describe('Recognize', () => {
     const [bucket] = await storage.createBucket(bucketName);
     await bucket.upload(filepath);
     await bucket.upload(filepath1);
+    await bucket.upload(filepath3);
   });
 
   after(async () => {
@@ -119,4 +122,16 @@ describe('Recognize', () => {
     const output = await exec(`${cmd} sync-enhanced-model ${filepath2}`);
     assert.match(output, new RegExp(text3));
   });
+
+  it('should run multi channel transcription on a local file', async () => {
+    const output = await exec(`${cmd} sync-multi-channel ${filepath3}`);
+    assert.match(output, /Channel Tag: 2/);
+  });
+
+  it('should run multi channel transcription on GCS file', async () => {
+    const output = await exec(
+      `${cmd} sync-multi-channel-gcs gs://${bucketName}/${filename3}`
+    );
+    assert.match(output, /Channel Tag: 2/);
+  });
 });