Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add voice switcher for TTS #77

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
16 changes: 11 additions & 5 deletions src/Translumo.Processing/TranslationProcessingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ public class TranslationProcessingService : IProcessingService, IDisposable
private long _lastTranslatedTextTicks;

private const float MIN_SCORE_THRESHOLD = 2.1f;

public TranslationProcessingService(ICapturerFactory capturerFactory, IChatTextMediator chatTextMediator, OcrEnginesFactory ocrEnginesFactory,
TranslatorFactory translationFactory, TtsFactory ttsFactory, TtsConfiguration ttsConfiguration,
TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration,
TextDetectionProvider textProvider, TranslationConfiguration translationConfiguration, OcrGeneralConfiguration ocrConfiguration,
TextResultCacheService textResultCacheService, TextProcessingConfiguration textConfiguration, ILogger<TranslationProcessingService> logger)
{
_logger = logger;
Expand Down Expand Up @@ -236,7 +236,7 @@ void CapturerEnsureInitialized()
continue;
}

if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText,
if (_textResultCacheService.IsCached(bestDetected.Text, bestDetected.ValidityScore, sequentialText,
bestDetected.Language.Asian, out iterationId))
{
sequentialText = false;
Expand All @@ -257,7 +257,7 @@ void CapturerEnsureInitialized()
}

_logger.LogError(ex, $"Screen capture failed (code: {ex.ErrorCode})");

_capturer.Dispose();
_capturer = null;
CapturerEnsureInitialized();
Expand Down Expand Up @@ -407,9 +407,15 @@ private void TtsConfigurationOnPropertyChanged(object sender, PropertyChangedEve
if (e.PropertyName == nameof(_ttsConfiguration.TtsLanguage)
|| e.PropertyName == nameof(_ttsConfiguration.TtsSystem))
{
_ttsEngine.Dispose();
_ttsEngine?.Dispose();
_ttsEngine = null;
igubanov marked this conversation as resolved.
Show resolved Hide resolved
_ttsEngine = _ttsFactory.CreateTtsEngine(_ttsConfiguration);
}
else if (e.PropertyName == nameof(_ttsConfiguration.CurrentVoice)
&& _ttsEngine != null && _ttsConfiguration.CurrentVoice != null)
{
_ttsEngine.SetVoice(_ttsConfiguration.CurrentVoice);
}
}

private void OcrGeneralConfigurationOnPropertyChanged(object sender, PropertyChangedEventArgs e)
Expand Down
6 changes: 5 additions & 1 deletion src/Translumo.TTS/Engines/ITTSEngine.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
namespace Translumo.TTS.Engines;

public interface ITTSEngine: IDisposable
public interface ITTSEngine : IDisposable
{
void SpeechText(string text);

string[] GetVoices();

void SetVoice(string voice);
}
6 changes: 6 additions & 0 deletions src/Translumo.TTS/Engines/NoneTTSEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ public void Dispose()
{
}

public string[] GetVoices() => new[] { "None" };

public void SetVoice(string voice)
{
}

public void SpeechText(string text)
{
}
Expand Down
8 changes: 7 additions & 1 deletion src/Translumo.TTS/Engines/SileroTTSEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public class SileroTTSEngine : ITTSEngine
{
private dynamic _ipython;
private dynamic _model;
private string[] _voices;
private string _voice;
private readonly string _modelPath;
private readonly PythonEngineWrapper _pythonEngine;
Expand Down Expand Up @@ -51,7 +52,8 @@ private void Init()
_pyObjects.Add(_ipython);
});

_voice = ((string[])_model.speakers).First();
_voices = (string[])_model.speakers;
_voice = _voices.First();
}

public void SpeechText(string text)
Expand Down Expand Up @@ -146,5 +148,9 @@ private string GetModelFullPath(string langCode)
_ => null
};

public string[] GetVoices() => _voices;

public void SetVoice(string voice) => _voice = _voices.First(x => x.Equals(voice, StringComparison.OrdinalIgnoreCase));
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a bug, exception occurred when TTS with voice is selected, then user switch to another TTS. Need to set TtsConfiguration.CurrentVoice after(before) this update.


private sealed record ModelDescription(string FileUrl, string WarmUpText);
}
97 changes: 93 additions & 4 deletions src/Translumo.TTS/Engines/WindowsTTSEngine.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
using System.Globalization;
using System.Collections;
using System.Collections.ObjectModel;
using System.Globalization;
using System.Reflection;
using System.Speech.Synthesis;

namespace Translumo.TTS.Engines;

public class WindowsTTSEngine : ITTSEngine
{
private readonly VoiceInfo _voiceInfo;
private VoiceInfo _voiceInfo;
private readonly SpeechSynthesizer _synthesizer;
private readonly ReadOnlyDictionary<string, VoiceInfo> _voices;

public WindowsTTSEngine(string languageCode)
{
_synthesizer = new SpeechSynthesizer();
_synthesizer.SetOutputToDefaultAudioDevice();
_synthesizer.Rate = 1;

_voiceInfo = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).FirstOrDefault()?.VoiceInfo;
SpeechApiReflectionHelper.InjectOneCoreVoices(_synthesizer);
_voices = _synthesizer.GetInstalledVoices(new CultureInfo(languageCode)).ToDictionary(x => x.VoiceInfo.Name, x => x.VoiceInfo).AsReadOnly();
_voiceInfo = _voices.First().Value;
}

public void SpeechText(string text)
Expand All @@ -36,4 +41,88 @@ public void Dispose()
{
_synthesizer.Dispose();
}

public string[] GetVoices() => _voices.Keys.ToArray();

public void SetVoice(string voice) => _voiceInfo = _voices.First(x => x.Key.Equals(voice, StringComparison.OrdinalIgnoreCase)).Value;

// by default SpeechSynthesizer show not all available voices
// https://stackoverflow.com/a/71198211
private static class SpeechApiReflectionHelper
{
private const string PROP_VOICE_SYNTHESIZER = "VoiceSynthesizer";
private const string FIELD_INSTALLED_VOICES = "_installedVoices";

private const string ONE_CORE_VOICES_REGISTRY = @"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech_OneCore\Voices";

private static readonly Type _objectTokenCategoryType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Internal.ObjectTokens.ObjectTokenCategory")!;

private static readonly Type _voiceInfoType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Synthesis.VoiceInfo")!;

private static readonly Type _installedVoiceType = typeof(SpeechSynthesizer).Assembly
.GetType("System.Speech.Synthesis.InstalledVoice")!;


public static void InjectOneCoreVoices(SpeechSynthesizer synthesizer)
{
var voiceSynthesizer = GetProperty(synthesizer, PROP_VOICE_SYNTHESIZER);
if (voiceSynthesizer == null)
throw new NotSupportedException($"Property not found: {PROP_VOICE_SYNTHESIZER}");

var installedVoices = GetField(voiceSynthesizer, FIELD_INSTALLED_VOICES) as IList;
if (installedVoices == null)
throw new NotSupportedException($"Field not found or null: {FIELD_INSTALLED_VOICES}");

if (_objectTokenCategoryType
.GetMethod("Create", BindingFlags.Static | BindingFlags.NonPublic)?
.Invoke(null, new object?[] { ONE_CORE_VOICES_REGISTRY }) is not IDisposable otc)
throw new NotSupportedException($"Failed to call Create on {_objectTokenCategoryType} instance");

using (otc)
{
if (_objectTokenCategoryType
.GetMethod("FindMatchingTokens", BindingFlags.Instance | BindingFlags.NonPublic)?
.Invoke(otc, new object?[] { null, null }) is not IList tokens)
throw new NotSupportedException($"Failed to list matching tokens");

foreach (var token in tokens)
{
if (token == null || GetProperty(token, "Attributes") == null)
continue;

var voiceInfo =
typeof(SpeechSynthesizer).Assembly
.CreateInstance(_voiceInfoType.FullName!, true,
BindingFlags.Instance | BindingFlags.NonPublic, null,
new object[] { token }, null, null);

if (voiceInfo == null)
throw new NotSupportedException($"Failed to instantiate {_voiceInfoType}");

var installedVoice =
typeof(SpeechSynthesizer).Assembly
.CreateInstance(_installedVoiceType.FullName!, true,
BindingFlags.Instance | BindingFlags.NonPublic, null,
new object[] { voiceSynthesizer, voiceInfo }, null, null);

if (installedVoice == null)
throw new NotSupportedException($"Failed to instantiate {_installedVoiceType}");

installedVoices.Add(installedVoice);
}
}
}

private static object? GetProperty(object target, string propName)
{
return target.GetType().GetProperty(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target);
}

private static object? GetField(object target, string propName)
{
return target.GetType().GetField(propName, BindingFlags.Instance | BindingFlags.NonPublic)?.GetValue(target);
}
}
}
6 changes: 6 additions & 0 deletions src/Translumo.TTS/IObserverAvailableVoices.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Translumo.TTS;

public interface IObserverAvailableVoices
{
void UpdateVoice(IList<string> currentVoices);
}
14 changes: 11 additions & 3 deletions src/Translumo.TTS/TtsConfiguration.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Translumo.Infrastructure.Language;
using System.Collections.ObjectModel;
using Translumo.Infrastructure.Language;
using Translumo.Utils;
using Windows.Security.EnterpriseData;

namespace Translumo.TTS;

Expand All @@ -12,12 +12,14 @@ public class TtsConfiguration : BindableBase
{
TtsLanguage = Languages.English,
TtsSystem = TTSEngines.None,
InstalledWinTtsLanguages = new List<Languages>()
InstalledWinTtsLanguages = new List<Languages>(),
_currentVoice = string.Empty,
};

private TTSEngines _ttsSystem;
private Languages _ttsLanguage;
private List<Languages> _installedWinTtsLanguages;
private string _currentVoice;

public TTSEngines TtsSystem
{
Expand All @@ -37,6 +39,12 @@ public Languages TtsLanguage
}
}

public string CurrentVoice
{
get => _currentVoice;
set => SetProperty(ref _currentVoice, value);
}

public List<Languages> InstalledWinTtsLanguages
{
get => _installedWinTtsLanguages;
Expand Down
15 changes: 12 additions & 3 deletions src/Translumo.TTS/TtsFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,33 @@ public class TtsFactory
{
private readonly LanguageService _languageService;
private readonly PythonEngineWrapper _pythonEngine;
private readonly IObserverAvailableVoices _observerAvailableVoices;
private readonly ILogger _logger;

public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, ILogger<TtsFactory> logger)
public TtsFactory(LanguageService languageService, PythonEngineWrapper pythonEngine, IObserverAvailableVoices observerAvailableVoices, ILogger<TtsFactory> logger)
{
_languageService = languageService;
_pythonEngine = pythonEngine;
_observerAvailableVoices = observerAvailableVoices;
_logger = logger;
}

public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration) =>
ttsConfiguration.TtsSystem switch
public ITTSEngine CreateTtsEngine(TtsConfiguration ttsConfiguration)
{
ITTSEngine ttsEngine = ttsConfiguration.TtsSystem switch
{
TTSEngines.None => new NoneTTSEngine(),
TTSEngines.WindowsTTS => new WindowsTTSEngine(GetLangCode(ttsConfiguration)),
TTSEngines.SileroTTS => new SileroTTSEngine(_pythonEngine, GetLangCode(ttsConfiguration)),
_ => throw new NotSupportedException()
};

var voices = ttsEngine.GetVoices();
_observerAvailableVoices.UpdateVoice(voices);

return ttsEngine;
}

private string GetLangCode(TtsConfiguration ttsConfiguration) =>
_languageService.GetLanguageDescriptor(ttsConfiguration.TtsLanguage).Code;
}
Expand Down
1 change: 1 addition & 0 deletions src/Translumo/App.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ private void ConfigureServices(ServiceCollection services)
services.AddScoped<HotkeysSettingsViewModel>();
services.AddScoped<LanguagesSettingsViewModel>();
services.AddScoped<OcrSettingsViewModel>();
services.AddScoped<IObserverAvailableVoices, LanguagesSettingsViewModel>();

var chatWindowConfiguration = ChatWindowConfiguration.Default;
services.AddSingleton<OcrGeneralConfiguration>(OcrGeneralConfiguration.Default);
Expand Down
Loading
Loading