Skip to content

Commit

Permalink
com.rest.elevenlabs 3.2.0 (#52)
Browse files Browse the repository at this point in the history
- updated com.utilities.rest -> 2.4.0
- fixed text to speech request encoding if test is encoded to something other than utf8
  • Loading branch information
StephenHodgson committed Dec 14, 2023
1 parent 75f4390 commit 19515c4
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public async Task<VoiceClip> DownloadHistoryAudioAsync(HistoryItem historyItem,
}

await Awaiters.UnityMainThread;
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.UNKNOWN, cancellationToken: cancellationToken);
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.UNKNOWN, debug: EnableDebug, cancellationToken: cancellationToken);
return new VoiceClip(historyItem.Id, historyItem.Text, voice, audioClip, cachedPath);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSe
{
ValidateInputs(text, voice);

var downloadDirectory = await GetCacheDirectoryAsync(voice);
var defaultVoiceSettings = voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
var request = new TextToSpeechRequest(text, model, defaultVoiceSettings);
var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
Expand Down Expand Up @@ -95,6 +94,7 @@ public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSe
AudioType.OGGVORBIS => "ogg",
_ => throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}")
};
var downloadDirectory = await GetCacheDirectoryAsync(voice);
var cachedPath = $"{downloadDirectory}/{clipId}.{extension}";

if (!File.Exists(cachedPath))
Expand All @@ -115,15 +115,15 @@ public async Task<VoiceClip> TextToSpeechAsync(string text, Voice voice, VoiceSe
_ => throw new ArgumentOutOfRangeException(nameof(outputFormat), outputFormat, null)
};
var oggBytes = await OggEncoder.ConvertToBytesAsync(pcmData, frequency, 1, cancellationToken: cancellationToken).ConfigureAwait(false);
await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken: cancellationToken).ConfigureAwait(false);
await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken).ConfigureAwait(false);
break;
default:
throw new ArgumentOutOfRangeException($"Unsupported {nameof(AudioType)}: {audioType}");
}
}

await Awaiters.UnityMainThread;
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, cancellationToken: cancellationToken);
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, debug: EnableDebug, cancellationToken: cancellationToken);
return new VoiceClip(clipId, text, voice, audioClip, cachedPath);
}

Expand Down Expand Up @@ -182,7 +182,6 @@ public async Task<VoiceClip> StreamTextToSpeechAsync(string text, Voice voice, A
OutputFormat.PCM_44100 => 44100,
_ => throw new ArgumentOutOfRangeException(nameof(outputFormat), outputFormat, null)
};
var downloadDirectory = await GetCacheDirectoryAsync(voice);
var defaultVoiceSettings = voiceSettings ?? voice.Settings ?? await client.VoicesEndpoint.GetDefaultVoiceSettingsAsync(cancellationToken);
var request = new TextToSpeechRequest(text, model, defaultVoiceSettings);
var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
Expand All @@ -208,10 +207,11 @@ public async Task<VoiceClip> StreamTextToSpeechAsync(string text, Voice voice, A
var pcmData = PCMEncoder.Decode(response.Data, PCMFormatSize.SixteenBit);
var fullClip = AudioClip.Create(clipId, pcmData.Length, 1, frequency, false);
var oggBytes = await OggEncoder.ConvertToBytesAsync(pcmData, frequency, 1, cancellationToken: cancellationToken).ConfigureAwait(false);
var downloadDirectory = await GetCacheDirectoryAsync(voice);
var cachedPath = $"{downloadDirectory}/{clipId}.ogg";
await File.WriteAllBytesAsync(cachedPath, oggBytes, cancellationToken: cancellationToken).ConfigureAwait(false);
await Awaiters.UnityMainThread;
await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.OGGVORBIS, cancellationToken: cancellationToken);
await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.OGGVORBIS, debug: EnableDebug, cancellationToken: cancellationToken);
return new VoiceClip(clipId, text, voice, fullClip, cachedPath);

void StreamCallback(Response partialResponse)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using ElevenLabs.Voices;
using Newtonsoft.Json;
using System;
using System.Text;
using UnityEngine.Scripting;

namespace ElevenLabs.TextToSpeech
Expand All @@ -22,6 +23,11 @@ internal sealed class TextToSpeechRequest
throw new ArgumentNullException(nameof(text));
}

if (!Encoding.GetEncoding(text).Equals(Encoding.UTF8))
{
text = Encoding.UTF8.GetString(Encoding.Default.GetBytes(text));
}

Text = text;
Model = model ?? Models.Model.MonoLingualV1;
VoiceSettings = voiceSettings ?? throw new ArgumentNullException(nameof(voiceSettings));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public async Task<GeneratedVoiceOptions> GetVoiceGenerationOptionsAsync(Cancella
}

await File.WriteAllBytesAsync(cachedPath, response.Data, cancellationToken).ConfigureAwait(true);
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.MPEG, cancellationToken: cancellationToken);
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", AudioType.MPEG, debug: EnableDebug, cancellationToken: cancellationToken);
return new Tuple<string, AudioClip>(generatedVoiceId, audioClip);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ public async Task<VoiceClip> DownloadVoiceSampleAudioAsync(Voice voice, Sample s
}
}

var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, cancellationToken: cancellationToken);
var audioClip = await Rest.DownloadAudioClipAsync($"file://{cachedPath}", audioType, debug: EnableDebug, cancellationToken: cancellationToken);
return new VoiceClip(sample.Id, string.Empty, voice, audioClip, cachedPath);
}

Expand Down
4 changes: 2 additions & 2 deletions ElevenLabs/Packages/com.rest.elevenlabs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"displayName": "ElevenLabs",
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
"keywords": [],
"version": "3.1.2",
"version": "3.2.0",
"unity": "2021.3",
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
Expand All @@ -17,7 +17,7 @@
"url": "https://github.com/StephenHodgson"
},
"dependencies": {
"com.utilities.rest": "2.3.1",
"com.utilities.rest": "2.4.0",
"com.utilities.encoder.ogg": "3.0.12"
},
"samples": [
Expand Down

0 comments on commit 19515c4

Please sign in to comment.