Azure-Samples · trrwilson · Jul 11, 2020 · Jul 11, 2020 · Jul 23, 2020 · Jul 24, 2020
@@ -3,8 +3,9 @@
 
 namespace UWPVoiceAssistantSample
 {
-    using Newtonsoft.Json.Linq;
-
+    using Newtonsoft.Json.Linq;
+    using System.Globalization;
+
     /// <summary>
     /// Class determines the activity received from the Bot and deserializes the response.
     /// </summary>
@@ -18,31 +19,31 @@ public ActivityWrapper(string activityJson)
         {
             var activityObj = JObject.Parse(activityJson);
 
-            switch (activityObj["type"]?.ToString().ToLower())
+            switch (activityObj["type"]?.ToString().ToUpperInvariant())
             {
-                case "trace":
+                case "TRACE":
                     this.Type = ActivityType.Trace;
                     break;
-                case "message":
+                case "MESSAGE":
                     this.Type = ActivityType.Message;
                     break;
-                case "event":
+                case "EVENT":
                     this.Type = ActivityType.Event;
                     break;
                 default:
                     this.Type = ActivityType.Unrecognized;
                     break;
             }
 
-            switch (activityObj["inputHint"]?.ToString().ToLower())
+            switch (activityObj["inputHint"]?.ToString().ToUpperInvariant())
             {
-                case "ignoringinput":
+                case "IGNORINGINPUT":
                     this.InputHint = InputHintType.IgnoringInput;
                     break;
-                case "acceptinginput":
+                case "ACCEPTINGINPUT":
                     this.InputHint = InputHintType.AcceptingInput;
                     break;
-                case "expectinginput":
+                case "EXPECTINGINPUT":
                     this.InputHint = InputHintType.ExpectingInput;
                     break;
                 default:

@@ -25,6 +25,7 @@ public sealed partial class App : Application, IDisposable
     {
         private readonly ILogProvider logger;
         private readonly IDialogManager dialogManager;
+        private readonly IKeywordRegistration keywordRegistration;
         private readonly IAgentSessionManager agentSessionManager;
         private BackgroundTaskDeferral deferral;
         private bool alreadyDisposed = false;
@@ -45,21 +46,18 @@ public App()
             this.Suspending += this.OnSuspending;
             MVARegistrationHelpers.UnlockLimitedAccessFeature();
 
-            var keywordRegistration = new KeywordRegistration(
-                new Version(1, 0, 0, 0));
-
+            this.keywordRegistration = new KeywordRegistration();
             this.agentSessionManager = new AgentSessionManager();
-
             this.dialogManager = new DialogManager<List<byte>>(
                 new DirectLineSpeechDialogBackend(),
-                keywordRegistration,
+                this.keywordRegistration,
                 new AgentAudioInputProvider(),
                 this.agentSessionManager,
                 new MediaPlayerDialogAudioOutputAdapter());
 
             var serviceCollection = new ServiceCollection();
             serviceCollection.AddSingleton(this.dialogManager);
-            serviceCollection.AddSingleton<IKeywordRegistration>(keywordRegistration);
+            serviceCollection.AddSingleton(this.keywordRegistration);
             serviceCollection.AddSingleton(this.agentSessionManager);
             this.Services = serviceCollection.BuildServiceProvider();
 

@@ -33,7 +33,7 @@ public enum WaveHeaderLengthOption
     /// <summary>
     /// An encapsulation of extra data and operations needed to apply a WAVEFORMAT header to an existing stream.
     /// </summary>
-    public class WaveHeader
+    public static class WaveHeader
     {
         /// <summary>
         /// Writes a standard WAVEFORMAT header (RIFF) to the provided stream that matches the provided PCM encoding

@@ -30,16 +30,16 @@ public class AgentAudioInputProvider
         protected AudioEncodingProperties outputEncoding;
         protected IAgentSessionWrapper agentSession;
         protected AudioGraph inputGraph;
-        protected AudioDeviceInputNode inputNode;
         protected AudioFrameOutputNode outputNode;
         protected bool graphRunning;
         protected bool disposed;
         protected SemaphoreSlim debugAudioOutputFileSemaphore;
         protected Stream debugAudioOutputFileStream;
+        private readonly ILogProvider logger;
+        private AudioDeviceInputNode inputNode;
         private bool dataAvailableInitialized = false;
         private int bytesToSkip;
         private int bytesAlreadySkipped;
-        private ILogProvider logger;
 
         /// <summary>
         /// Initializes a new instance of the <see cref="AgentAudioInputProvider"/> class.
@@ -65,7 +65,7 @@ public AgentAudioInputProvider()
         /// a keyword in an audio stream. Some amount of silence/audio prior to the keyword is necessary for
         /// normal operation.
         /// </summary>
-        public static TimeSpan InitialKeywordTrimDuration { get; } = new TimeSpan(0, 0, 0, 0, 2000);
+        public static TimeSpan InitialKeywordTrimDuration { get; } = new TimeSpan(0, 0, 0, 0, 2250);
 
         /// <summary>
         /// Gets or sets a value indicating whether debug audio output to local file capture
@@ -154,6 +154,9 @@ public async Task StopAsync()
                 await this.FinishDebugAudioDumpIfNeededAsync();
 
                 this.inputGraph.Stop();
+                this.inputNode.Stop();
+                this.inputNode.Dispose();
+                this.inputGraph.Dispose();
 
                 this.logger.Log(LogMessageLevel.AudioLogs, "Audio Graph Stopped");
                 this.graphRunning = false;

@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+namespace UWPVoiceAssistantSample.AudioInput
+{
+    using Microsoft.CognitiveServices.Speech;
+    using UWPVoiceAssistantSample.AudioCommon;
+    using Windows.Media.MediaProperties;
+
+    /// <summary>
+    /// Encapsulation of the data source types usable by PullAudioInputSink.
+    /// </summary>
+    public class PullAudioDataSource
+    {
+        private PullAudioDataSource(object baseSource, bool padWithZeroes = true)
+        {
+            this.BaseSource = baseSource;
+            this.PadWithZeroes = padWithZeroes;
+        }
+
+        /// <summary>
+        /// Gets a predefined PullAudioDataSource for empty input (all zeroes).
+        /// </summary>
+        public static PullAudioDataSource EmptyInput { get; } = new PullAudioDataSource(null);
+
+        /// <summary>
+        /// Gets a predefined PullAudioDataSource that designates data will be manually pushed into the consuming sink.
+        /// </summary>
+        public static PullAudioDataSource PushedData { get; } = new PullAudioDataSource(null);
+
+        /// <summary>
+        /// Gets the underlying object, if applicable, used as the data source.
+        /// </summary>
+        public object BaseSource { get; private set; }
+
+        /// <summary>
+        /// Gets a value indicating whether incomplete reads should be padded with zeroes for this source.
+        /// </summary>
+        public bool PadWithZeroes { get; private set; } = true;
+
+        /// <summary>
+        /// Gets or sets the encoding information associated with the current base audio source.
+        /// </summary>
+        public AudioEncodingProperties AudioFormat { get; set; } = DirectLineSpeechAudio.DefaultInput.Encoding;
+
+        /// <summary>
+        /// Creates a PullAudioDataSource from the provided KeywordRecognitionResult that will instruct consumers to
+        /// read data from the derived AudioDataInputStream.
+        /// </summary>
+        /// <param name="result"> The KeywordRecognitionResult from which to derive the input audio. </param>
+        /// <param name="padWithZeroes"> Whether incomplete reads should be padded with zeroes. </param>
+        /// <returns> A PullAudioDataSource for this KeywordRecognitionResult. </returns>
+        public static PullAudioDataSource FromKeywordResult(KeywordRecognitionResult result, bool padWithZeroes = true)
+            => new PullAudioDataSource(AudioDataStream.FromResult(result), padWithZeroes);
+    }
+}
@@ -0,0 +1,198 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+namespace UWPVoiceAssistantSample.AudioInput
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Diagnostics.Contracts;
+    using System.Threading;
+    using Microsoft.CognitiveServices.Speech;
+    using Microsoft.CognitiveServices.Speech.Audio;
+    using Windows.Media.MediaProperties;
+
+    /// <summary>
+    /// Helper class that encapsulates state management for a reusable PullAudioInputStream object that may use a variety
+    /// of underlying sources.
+    /// </summary>
+    public class PullAudioInputSink : PullAudioInputStreamCallback
+    {
+        private readonly List<byte> pushDataBuffer = new List<byte>();
+        private readonly object dataSourceLock = new object();
+        private PullAudioDataSource dataSource;
+
+        /// <summary>
+        /// Raised upon the first read that crosses the current BookmarkPosition, as counted since last reset.
+        /// </summary>
+        public event Action<TimeSpan> BookmarkReached;
+
+        /// <summary>
+        /// Gets the duration of audio pulled from this sink since its last reset operation.
+        /// </summary>
+        public TimeSpan AudioReadSinceReset { get; private set; } = TimeSpan.Zero;
+
+        /// <summary>
+        /// Gets or sets the next position when BookmarkReached will be fired if a read causes BytesReadSinceReset to cross
+        /// the position.
+        /// </summary>
+        public TimeSpan BookmarkPosition { get; set; } = TimeSpan.Zero;
+
+        /// <summary>
+        /// Gets or sets a friendly label to associate with this input sink.
+        /// </summary>
+        public string Label { get; set; }
+
+        /// <summary>
+        /// Gets or sets the active data source to be used for subsequent reads from this input sink.
+        /// </summary>
+        public PullAudioDataSource DataSource
+        {
+            get => this.dataSource;
+            set
+            {
+                if (this.dataSource != null)
+                {
+                    switch (this.dataSource.BaseSource)
+                    {
+                        case AudioDataStream resultStream:
+                            resultStream.DetachInput();
+                            resultStream.Dispose();
+                            break;
+                        default:
+                            break;
+                    }
+                }
+
+                this.dataSource = value;
+            }
+        }
+
+        /// <summary>
+        /// Adds the provided data to the buffer to be consumed when an appropriate source type is set.
+        /// </summary>
+        /// <param name="bytes"> The bytes to enqueue to the buffer. </param>
+        public void PushData(IEnumerable<byte> bytes)
+        {
+            lock (this.dataSourceLock)
+            {
+                this.pushDataBuffer.AddRange(bytes);
+            }
+        }
+
+        /// <summary>
+        /// Resets the source and data consumption count of this input sink.
+        /// </summary>
+        public void Reset()
+        {
+            this.DataSource = null;
+            lock (this.dataSourceLock)
+            {
+                this.pushDataBuffer.Clear();
+            }
+
+            this.AudioReadSinceReset = TimeSpan.Zero;
+        }
+
+        /// <summary>
+        /// Implemented for PullAudioInputStreamCallback. Fills the provided buffer from the currently selected data
+        /// source and optionally pads incomplete base reads with zeroes to prevent a stream termination.
+        /// </summary>
+        /// <param name="dataBuffer"> The buffer to fill with data. </param>
+        /// <param name="size"> The size of the buffer. </param>
+        /// <returns> The final number of bytes populated in dataBuffer. </returns>
+        public override int Read(byte[] dataBuffer, uint size)
+        {
+            Contract.Requires(dataBuffer != null);
+
+            var baseSource = this.DataSource?.BaseSource;
+
+            var bytesRead =
+                this.DataSource == PullAudioDataSource.PushedData ? this.ReadFromBuffer(dataBuffer)
+                : baseSource is AudioDataStream ? this.ReadFromKeyword(dataBuffer)
+                : baseSource is null ? 0
+                : throw new ArgumentException("Unsupported PullAudioDataSource");
+
+            if (this.DataSource != null && this.DataSource.PadWithZeroes)
+            {
+                Array.Fill<byte>(dataBuffer, 0, bytesRead, dataBuffer.Length - bytesRead);
+                bytesRead = dataBuffer.Length;
+            }
+
+            if (this.DataSource?.AudioFormat is AudioEncodingProperties audioFormat)
+            {
+                var priorTotalReadDuration = this.AudioReadSinceReset;
+                this.AudioReadSinceReset += TimeSpan.FromSeconds(8.0f * bytesRead / audioFormat.Bitrate);
+
+                if (priorTotalReadDuration < this.BookmarkPosition
+                    && this.AudioReadSinceReset >= this.BookmarkPosition)
+                {
+                    this.BookmarkReached?.Invoke(this.AudioReadSinceReset);
+                }
+            }
+
+            return bytesRead;
+        }
+
+        private int ReadFromKeyword(byte[] buffer)
+        {
+            for (var doneWaiting = false; !doneWaiting;)
+            {
+                lock (this.dataSourceLock)
+                {
+                    doneWaiting = !(this.DataSource?.BaseSource is AudioDataStream keywordAudioSource)
+                        || keywordAudioSource.GetStatus() != StreamStatus.PartialData
+                        || keywordAudioSource.CanReadData((uint)buffer.Length);
+                }
+
+                if (!doneWaiting)
+                {
+                    Thread.Sleep(50);
+                }
+            }
+
+            lock (this.dataSourceLock)
+            {
+                if (!(this.DataSource?.BaseSource is AudioDataStream keywordAudioSource))
+                {
+                    return 0;
+                }
+
+                var bytesToRead = buffer.Length;
+
+                while (!keywordAudioSource.CanReadData((uint)bytesToRead))
+                {
+                    bytesToRead--;
+                }
+
+                var bufferToUse = bytesToRead == buffer.Length ? buffer : new byte[bytesToRead];
+                var result = keywordAudioSource.ReadData(bufferToUse);
+
+                if (bufferToUse != buffer)
+                {
+                    Array.Copy(bufferToUse, buffer, bufferToUse.Length);
+                }
+
+                return (int)result;
+            }
+        }
+
+        private int ReadFromBuffer(byte[] readBuffer)
+        {
+            while (true)
+            {
+                lock (this.dataSourceLock)
+                {
+                    if (this.pushDataBuffer.Count >= readBuffer.Length || this.DataSource != PullAudioDataSource.PushedData)
+                    {
+                        var bytesAvailable = (int)Math.Min(readBuffer.Length, this.pushDataBuffer.Count);
+                        this.pushDataBuffer.CopyTo(0, readBuffer, 0, bytesAvailable);
+                        this.pushDataBuffer.RemoveRange(0, bytesAvailable);
+                        return bytesAvailable;
+                    }
+                }
+
+                Thread.Sleep(50);
+            }
+        }
+    }
+}
@@ -21,6 +21,7 @@ public class DirectLineSpeechAudioOutputStream
         /// Initializes a new instance of the <see cref="DirectLineSpeechAudioOutputStream"/> class.
         /// </summary>
         /// <param name="audioSource"> The PullAudioOutputStream that should be read from. </param>
+        /// <param name="format"> The format information to associate with this audio source. </param>
         public DirectLineSpeechAudioOutputStream(PullAudioOutputStream audioSource, DialogAudio format)
             : base(format)
         {