Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech Recognition v2 - StartListening/StopListening #1382

Merged
merged 33 commits into from
Sep 23, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
5172070
Speech Recognition v2
VladislavAntonyuk Sep 4, 2023
7048308
Merge branch 'main' into speech-recognition-v2
VladislavAntonyuk Sep 6, 2023
58ad063
Fix tizen
VladislavAntonyuk Sep 6, 2023
912357e
Merge branch 'speech-recognition-v2' of https://github.com/CommunityT…
VladislavAntonyuk Sep 6, 2023
ce22dca
Fix tizen
VladislavAntonyuk Sep 6, 2023
c14a5e8
Fix PR comments
VladislavAntonyuk Sep 8, 2023
6552e04
Merge branch 'main' into speech-recognition-v2
VladislavAntonyuk Sep 8, 2023
82dc58d
Add tests
VladislavAntonyuk Sep 8, 2023
34ec3c1
Merge branch 'speech-recognition-v2' of https://github.com/CommunityT…
VladislavAntonyuk Sep 8, 2023
8412930
Fix tests
VladislavAntonyuk Sep 8, 2023
b0275d3
Merge branch 'main' into speech-recognition-v2
brminnick Sep 9, 2023
d0c85f5
Rename `ISpeechToText.State` -> `ISpeechToText.CurrentState`
brminnick Sep 9, 2023
f576493
Update Layout
brminnick Sep 9, 2023
4aaf087
Add `ISpeechToText.StateChanged`
brminnick Sep 9, 2023
61d1895
Add Missing CancellationToken
brminnick Sep 9, 2023
085b53c
Update Sample App
brminnick Sep 9, 2023
85352d7
Update SpeechToTextPage.xaml
brminnick Sep 9, 2023
6e00d06
`dotnet format`
brminnick Sep 9, 2023
2f50edb
Add tests, update CurrentState
VladislavAntonyuk Sep 10, 2023
4103ba5
Merge branch 'main' into speech-recognition-v2
brminnick Sep 15, 2023
efcd439
Add Missing XML
brminnick Sep 16, 2023
b0b8338
`dotnet format`
brminnick Sep 16, 2023
60c59cb
Update Formatting
brminnick Sep 16, 2023
e9d03a0
Add Missing Cancellation Usage, Update `SpeechToTextImplementation.ge…
brminnick Sep 16, 2023
5212804
Add `ResetSpeechRecognitionTaskCompletionSource()`
brminnick Sep 16, 2023
7eda8ff
Update SpeechToTextImplementation.tizen.cs
brminnick Sep 16, 2023
25906ad
`dotnet format`
brminnick Sep 16, 2023
35e23a5
Merge branch 'main' into speech-recognition-v2
brminnick Sep 18, 2023
0bcfd57
Dispose of `CancellationTokenRegistration`
brminnick Sep 19, 2023
6c6fed1
Merge branch 'speech-recognition-v2' of https://github.com/CommunityT…
brminnick Sep 19, 2023
b02fe3b
Add StateChanged impl on Tizen
JoonghyunCho Sep 22, 2023
09ca1e9
Merge branch 'main' into speech-recognition-v2
brminnick Sep 22, 2023
40a17d1
Merge branch 'main' into speech-recognition-v2
brminnick Sep 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,28 @@
</ContentPage.Resources>

<ScrollView>
<Grid
RowDefinitions="64, 24, 64, 24, auto, 60, 60, 60"
<VerticalStackLayout
Padding="30,0">

<Label
Grid.Row="0"
Text="SpeechToText allows the user to convert speech to text in real time"/>

<Label
Grid.Row="1"
Text="Locale"
FontAttributes="Bold"/>

<Picker
Grid.Row="2"
ItemsSource="{Binding Locales}"
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"
Margin="0,0,0,20">
</Picker>

<Label
Grid.Row="3"
Text="Language Output"
FontAttributes="Bold"/>

<Label
Grid.Row="4"
Text="{Binding RecognitionText}"
FontSize="18"
HorizontalOptions="Center"
Expand All @@ -50,26 +44,34 @@
Margin="0,0,0,20" />

<Button
Grid.Row="5"
Text="Play"
Command="{Binding PlayCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>

<Button
Grid.Row="6"
Text="Listen"
Command="{Binding ListenCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>

<Button
Grid.Row="7"
Text="Stop Listening"
Command="{Binding ListenCancelCommand}"
HorizontalOptions="Center"/>

</Grid>
<Button
Text="Start Listening"
Command="{Binding StartListenCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>

<Button
Text="Stop Listening"
Command="{Binding StopListenCommand}"
HorizontalOptions="Center"/>

</VerticalStackLayout>
</ScrollView>

</pages:BasePage>
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public partial class SpeechToTextViewModel : BaseViewModel

[ObservableProperty]
string? recognitionText = "Welcome to .NET MAUI Community Toolkit!";

public SpeechToTextViewModel(ITextToSpeech textToSpeech, ISpeechToText speechToText)
{
this.textToSpeech = textToSpeech;
Expand Down Expand Up @@ -99,6 +99,48 @@ async Task Listen(CancellationToken cancellationToken)
RecognitionText = string.Empty;
}
}

[RelayCommand]
async Task StartListen(CancellationToken cancellationToken)
{
var isGranted = await speechToText.RequestPermissions(cancellationToken);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(CancellationToken.None);
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

await speechToText.StartListeningAsync(CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage), cancellationToken);
speechToText.RecognitionResultUpdated += SpeechToTextOnRecognitionResultUpdated;
speechToText.RecognitionResultCompleted += SpeechToTextOnRecognitionResultCompleted;

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}

[RelayCommand]
async Task StopListen(CancellationToken cancellationToken)
{
speechToText.RecognitionResultUpdated -= SpeechToTextOnRecognitionResultUpdated;
speechToText.RecognitionResultCompleted -= SpeechToTextOnRecognitionResultCompleted;
await speechToText.StopListeningAsync( cancellationToken);
}

void SpeechToTextOnRecognitionResultUpdated(object? sender, OnSpeechToTextRecognitionResultUpdated e)
{
RecognitionText += e.RecognitionResult;
}

void SpeechToTextOnRecognitionResultCompleted(object? sender, OnSpeechToTextRecognitionResultCompleted e)
{
RecognitionText = e.RecognitionResult;
}

void HandleLocalesCollectionChanged(object? sender, NotifyCollectionChangedEventArgs e)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,54 @@ public interface ISpeechToText : IAsyncDisposable
/// <returns>Final recognition result</returns>
Task<SpeechToTextResult> ListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken);

/// <summary>
/// Converts speech to text in real time.
/// </summary>
/// <param name="culture">Speak language</param>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
Task StartListeningAsync(CultureInfo culture, CancellationToken cancellationToken);

/// <summary>
/// Stop listening.
/// </summary>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
Task StopListeningAsync(CancellationToken cancellationToken);

/// <summary>
/// Converts speech to text in real time.
/// </summary>
event EventHandler<OnSpeechToTextRecognitionResultUpdated> RecognitionResultUpdated;

/// <summary>
/// Final recognition result.
/// </summary>
event EventHandler<OnSpeechToTextRecognitionResultCompleted> RecognitionResultCompleted;
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Request permissions for speech to text.
/// </summary>
/// <param name="cancellationToken"><see cref="CancellationToken"/></param>
/// <returns>True if permissions granted</returns>
Task<bool> RequestPermissions(CancellationToken cancellationToken);

/// <summary>
/// Current listening state
/// </summary>
SpeechToTextState State { get; }
}

/// <summary>
/// Speech To Text listening state
/// </summary>
public enum SpeechToTextState
{
/// <summary>
/// Listening is active
/// </summary>
Listening,

/// <summary>
/// Listening is stopped
/// </summary>
Stopped
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ public sealed partial class SpeechToTextImplementation
SFSpeechRecognitionTask? recognitionTask;
SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest;

/// <inheritdoc/>
public SpeechToTextState State => recognitionTask?.State == SFSpeechRecognitionTaskState.Running
? SpeechToTextState.Listening
: SpeechToTextState.Stopped;

/// <inheritdoc />
public ValueTask DisposeAsync()
{
Expand Down Expand Up @@ -48,4 +53,10 @@ void StopRecording()
liveSpeechRequest?.EndAudio();
recognitionTask?.Cancel();
}

Task InternalStopListeningAsync(CancellationToken cancellationToken)
{
StopRecording();
return Task.CompletedTask;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ public sealed partial class SpeechToTextImplementation
{
SpeechRecognizer? speechRecognizer;
SpeechRecognitionListener? listener;
TaskCompletionSource<string>? speechRecognitionListenerTaskCompletionSource;
IProgress<string>? recognitionProgress;
CultureInfo? cultureInfo;

/// <inheritdoc/>
public SpeechToTextState State { get; private set; }

/// <inheritdoc />
public ValueTask DisposeAsync()
Expand All @@ -26,8 +32,9 @@ public ValueTask DisposeAsync()
}

[MemberNotNull(nameof(speechRecognizer), nameof(listener))]
async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken)
Task InternalStartListeningAsync(CultureInfo culture, CancellationToken cancellationToken)
{
cultureInfo = culture;
var isSpeechRecognitionAvailable = IsSpeechRecognitionAvailable();
if (!isSpeechRecognitionAvailable)
{
Expand All @@ -40,7 +47,6 @@ async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? r
throw new FeatureNotSupportedException("Speech recognizer is not available on this device");
}

var speechRecognitionListenerTaskCompletionSource = new TaskCompletionSource<string>();

listener = new SpeechRecognitionListener
{
Expand All @@ -49,7 +55,22 @@ async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? r
Results = HandleListenerResults
};
speechRecognizer.SetRecognitionListener(listener);
speechRecognizer.StartListening(CreateSpeechIntent(culture));
speechRecognizer.StartListening(CreateSpeechIntent(cultureInfo));
State = SpeechToTextState.Listening;
return Task.CompletedTask;
}

Task InternalStopListeningAsync(CancellationToken cancellationToken)
{
StopRecording();
return Task.CompletedTask;
}

async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken)
{
recognitionProgress = recognitionResult;
speechRecognitionListenerTaskCompletionSource = new();
await InternalStartListeningAsync(culture, CancellationToken.None);

await using (cancellationToken.Register(() =>
{
Expand All @@ -59,15 +80,23 @@ async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? r
{
return await speechRecognitionListenerTaskCompletionSource.Task;
}
}

void HandleListenerError(SpeechRecognizerError error)
=> speechRecognitionListenerTaskCompletionSource.TrySetException(new Exception("Failure in speech engine - " + error));
void HandleListenerError(SpeechRecognizerError error)
{
speechRecognitionListenerTaskCompletionSource?.TrySetException(new Exception("Failure in speech engine - " + error));
}

void HandleListenerPartialResults(string sentence)
=> recognitionResult?.Report(sentence);
void HandleListenerPartialResults(string sentence)
{
OnRecognitionResultUpdated(sentence);
recognitionProgress?.Report(sentence);
}

void HandleListenerResults(string result)
=> speechRecognitionListenerTaskCompletionSource.TrySetResult(result);
void HandleListenerResults(string result)
{
OnRecognitionResultCompleted(result);
speechRecognitionListenerTaskCompletionSource?.TrySetResult(result);
}

static Intent CreateSpeechIntent(CultureInfo culture)
Expand All @@ -90,6 +119,7 @@ void StopRecording()
{
speechRecognizer?.StopListening();
speechRecognizer?.Destroy();
State = SpeechToTextState.Stopped;
}

class SpeechRecognitionListener : Java.Lang.Object, IRecognitionListener
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using AVFoundation;
using Microsoft.Maui.ApplicationModel;
using Speech;

namespace CommunityToolkit.Maui.Media;

/// <inheritdoc />
public sealed partial class SpeechToTextImplementation
{
IProgress<string>? recognitionProgress;
TaskCompletionSource<string>? getRecognitionTaskCompletionSource;

[MemberNotNull(nameof(audioEngine), nameof(recognitionTask), nameof(liveSpeechRequest))]
async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken)
Task InternalStartListeningAsync(CultureInfo culture, CancellationToken cancellationToken)
{
getRecognitionTaskCompletionSource = new TaskCompletionSource<string>();
speechRecognizer = new SFSpeechRecognizer(NSLocale.FromLocaleIdentifier(culture.Name));

if (!speechRecognizer.Available)
Expand Down Expand Up @@ -43,7 +46,6 @@ async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? r
}

var currentIndex = 0;
var getRecognitionTaskCompletionSource = new TaskCompletionSource<string>();

recognitionTask = speechRecognizer.GetRecognitionTask(liveSpeechRequest, (result, err) =>
{
Expand All @@ -58,6 +60,7 @@ async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? r
{
currentIndex = 0;
StopRecording();
OnRecognitionResultCompleted(result.BestTranscription.FormattedString);
getRecognitionTaskCompletionSource.TrySetResult(result.BestTranscription.FormattedString);
}
else
Expand All @@ -66,12 +69,20 @@ async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? r
{
var s = result.BestTranscription.Segments[i].Substring;
currentIndex++;
recognitionResult?.Report(s);
recognitionProgress?.Report(s);
OnRecognitionResultUpdated(s);
}
}
}
});
return Task.CompletedTask;
brminnick marked this conversation as resolved.
Show resolved Hide resolved
}

async Task<string> InternalListenAsync(CultureInfo culture, IProgress<string>? recognitionResult, CancellationToken cancellationToken)
{
recognitionProgress = recognitionResult;
getRecognitionTaskCompletionSource ??= new TaskCompletionSource<string>();
await StartListeningAsync(culture, CancellationToken.None);
VladislavAntonyuk marked this conversation as resolved.
Show resolved Hide resolved
await using (cancellationToken.Register(() =>
{
StopRecording();
Expand Down