Skip to content

Commit

Permalink
Use the UTF.Unknown project to determine encoding when parsing inboun…
Browse files Browse the repository at this point in the history
…d email webhook rather than blindly fallback to UTF-8

Resolves #521
  • Loading branch information
Jericho committed May 5, 2024
1 parent 7664204 commit 4e2ada1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
1 change: 1 addition & 0 deletions Source/StrongGrid/StrongGrid.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
<PackageReference Include="Pathoschild.Http.FluentClient" Version="4.3.0" />
<PackageReference Include="StyleCop.Analyzers" Version="1.2.0-beta.435" PrivateAssets="All" />
<PackageReference Include="System.Text.Json" Version="8.0.3" />
<PackageReference Include="UTF.Unknown" Version="2.5.1" />
</ItemGroup>

<ItemGroup Condition=" $(TargetFramework.StartsWith('net4')) ">
Expand Down
26 changes: 24 additions & 2 deletions Source/StrongGrid/Utilities/SendGridMultipartFormDataParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using UtfUnknown;

namespace StrongGrid.Utilities
{
Expand Down Expand Up @@ -62,6 +63,23 @@ private static SendGridMultipartFormDataParser ConvertToSendGridParser(Multipart
{
// Get the encoding specified by SendGrid for this parameter
encodings.TryGetValue(parameter.Name, out Encoding encoding);

// If necessary, determine the encoding by looking at the content
if (encoding == null && (parameter.Data?.Any() ?? false))
{
// Concatenate the lines of data.
// Normally you would append a NewLine after each line but it's not necessary for this particular instance.
// Besides, we don't know (yet) what encoding to use to convert the NewLine characters into bytes.
var parameterData = parameter.Data
.SelectMany(d => d)
.ToArray();

// Try to detect the encoding based on the content
var result = CharsetDetector.DetectFromBytes(parameterData);
encoding = result?.Detected?.Encoding;
}

// When all else fails, fallback to UTF8
encoding ??= Encoding.UTF8;

sendGridParser._parameters.Add(new ParameterPart(parameter.Name, parameter.ToString(encoding)));
Expand All @@ -79,7 +97,7 @@ private static Encoding GetEncodingFromName(string encodingName)
{
return Encoding.GetEncoding(encodingName);
}
catch (ArgumentException)
catch
{
// ArgumentException is thrown when an "unusual" code page was used to encode a section of the email
// For example: {"to":"UTF-8","subject":"UTF-8","from":"UTF-8","text":"iso-8859-10"}
Expand All @@ -88,7 +106,11 @@ private static Encoding GetEncodingFromName(string encodingName)
// perfect because UTF-8 may or may not be able to handle all the encoded characters, but it's better
// than simply erroring out.
// See https://github.com/Jericho/StrongGrid/issues/341 for discussion.
return Encoding.UTF8;

// April 2024: return a null value instead of defaulting to UTF8 when the encoding name is invalid.
// This will allow us to subsequently use the actual content to attempt to determine which encoding
// was used to encode it.
return null;
}
}
}
Expand Down

0 comments on commit 4e2ada1

Please sign in to comment.