Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Src/IronPython.Modules/_codecs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ public static void register_error(CodeContext/*!*/ context, [NotNull]string name

public static PythonTuple ascii_decode(CodeContext context, [NotNull]IBufferProtocol input, string? errors = null) {
using var buffer = input.GetBuffer();
return DoDecode(context, "ascii", PythonAsciiEncoding.Instance, buffer, errors).ToPythonTuple();
return DoDecode(context, "ascii", Encoding.ASCII, buffer, errors).ToPythonTuple();
}

public static PythonTuple ascii_encode(CodeContext context, [NotNull]string input, string? errors = null)
=> DoEncode(context, "ascii", PythonAsciiEncoding.Instance, input, errors).ToPythonTuple();
=> DoEncode(context, "ascii", Encoding.ASCII, input, errors).ToPythonTuple();

#endregion

Expand Down
7 changes: 4 additions & 3 deletions Src/IronPython/Runtime/Operations/MarshalOps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static object GetObject (IEnumerator<byte> bytes) {
// True: 'T'
// False: 'F'
// Float: 'f', str len, float in str
// string: 't', int len, bytes (ascii)
// string: 't', int len, bytes (ascii) - obsolete, Python 2 legacy, never used for writing
// string: 'u', int len, bytes (unicode)
// string: 'R' <id> - refer to interned string
// StopIteration: 'S'
Expand Down Expand Up @@ -485,7 +485,7 @@ private int ReadInt32 () {
private double ReadFloatStr () {
MoveNext ();

string str = DecodeString (PythonAsciiEncoding.Instance, ReadBytes (_myBytes.Current));
string str = DecodeString (Encoding.ASCII, ReadBytes (_myBytes.Current));

double res = 0;
if (double.TryParse (str, out res)) {
Expand Down Expand Up @@ -536,7 +536,8 @@ private object ReadBinaryFloat () {
}

private object ReadAsciiString () {
string res = DecodeString (PythonAsciiEncoding.Instance, ReadBytes (ReadInt32 ()));
// Legacy IronPython 2 behavior, accepts Latin-1
string res = DecodeString (StringOps.Latin1Encoding, ReadBytes (ReadInt32 ()));
_strings[_strings.Count] = res;
return res;
}
Expand Down
61 changes: 32 additions & 29 deletions Src/IronPython/Runtime/Operations/StringOps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1682,12 +1682,27 @@ private static bool IsSign(char ch) {
return ch == '+' || ch == '-';
}

internal static string GetEncodingName(Encoding encoding, bool normalize = true) {
internal static string GetEncodingName(Encoding encoding, bool normalize = true, string defaultName = "unknown") {
string? name = null;

// if we have a valid code page try and get a reasonable name. The
// web names / mail displays tend to match CPython's terse names
if (encoding.CodePage != 0) {
switch (encoding.CodePage) {

// recognize a few common cases
case 1200: name = (defaultName == "utf-16" && BitConverter.IsLittleEndian) ? defaultName : "utf-16-le"; break;
case 1201: name = (defaultName == "utf-16" && !BitConverter.IsLittleEndian) ? defaultName : "utf-16-be"; break;

case 12000: name = (defaultName == "utf-32" && BitConverter.IsLittleEndian) ? defaultName : "utf-32-le"; break;
case 12001: name = (defaultName == "utf-32" && !BitConverter.IsLittleEndian) ? defaultName : "utf-32-be"; break;

case 20127: name = "ascii"; break;
case 28591: name = "latin-1"; break;

case 65000: name = "utf-7"; break;
case 65001: name = "utf-8"; break;
}
#if !NETCOREAPP && !NETSTANDARD
if (encoding.IsBrowserDisplay) {
name = encoding.WebName;
Expand All @@ -1699,30 +1714,18 @@ internal static string GetEncodingName(Encoding encoding, bool normalize = true)
#endif

if (name == null) {
switch (encoding.CodePage) {

// recognize a few common cases
case 1200: name = "utf-16LE"; break;
case 1201: name = "utf-16BE"; break;

case 12000: name = "utf-32LE"; break;
case 12001: name = "utf-32BE"; break;

case 20127: name = "us-ascii"; break;
case 28591: name = "iso-8859-1"; break;

case 65000: name = "utf-7"; break;
case 65001: name = "utf-8"; break;

// otherwise use a code page number which also matches CPython
default: name = "cp" + encoding.CodePage; break;
}
// otherwise use a code page number which also matches CPython
name = "cp" + encoding.CodePage;
}
}

if (name == null) {
// otherwise just finally fall back to the human readable name
name = encoding.EncodingName;
try {
name = encoding.EncodingName; // may throw on .NET Core for some encodings
} catch (NotSupportedException) {
name = defaultName;
}
}

return normalize ? NormalizeEncodingName(name) : name;
Expand Down Expand Up @@ -1802,9 +1805,9 @@ Encoding setFallback(Encoding enc, DecoderFallback fb) {
case "strict": e = setFallback(e, new ExceptionFallback(e is UTF8Encoding)); break;
case "replace": e = setFallback(e, ReplacementFallback); break;
case "ignore": e = setFallback(e, new DecoderReplacementFallback(string.Empty)); break;
case "surrogateescape": e = pe = new PythonSurrogateEscapeEncoding(e, encoding); break;
case "surrogatepass": e = pe = new PythonSurrogatePassEncoding(e, encoding); break;
default: e = pe = new PythonErrorHandlerEncoding(context, e, encoding, errors); break;
case "surrogateescape": e = pe = new PythonSurrogateEscapeEncoding(e); break;
case "surrogatepass": e = pe = new PythonSurrogatePassEncoding(e); break;
default: e = pe = new PythonErrorHandlerEncoding(context, e, errors); break;
}

string decoded = string.Empty;
Expand All @@ -1821,7 +1824,7 @@ Encoding setFallback(Encoding enc, DecoderFallback fb) {
}
} catch (DecoderFallbackException ex) {
// augmenting the caught exception instead of creating UnicodeDecodeError to preserve the stack trace
if (!ex.Data.Contains("encoding")) ex.Data["encoding"] = encoding;
if (!ex.Data.Contains("encoding")) ex.Data["encoding"] = GetEncodingName(e, normalize: false, defaultName: encoding);
if (!ex.Data.Contains("object")) ex.Data["object"] = Bytes.Make(span.Slice(start, length).ToArray()); ;
throw;
}
Expand Down Expand Up @@ -1878,9 +1881,9 @@ static Encoding setFallback(Encoding enc, EncoderFallback fb) {
case "backslashreplace": e = setFallback(e, new BackslashEncoderReplaceFallback()); break;
case "xmlcharrefreplace": e = setFallback(e, new XmlCharRefEncoderReplaceFallback()); break;
case "ignore": e = setFallback(e, new EncoderReplacementFallback(string.Empty)); break;
case "surrogateescape": e = new PythonSurrogateEscapeEncoding(e, encoding); break;
case "surrogatepass": e = new PythonSurrogatePassEncoding(e, encoding); break;
default: e = new PythonErrorHandlerEncoding(context, e, encoding, errors); break;
case "surrogateescape": e = new PythonSurrogateEscapeEncoding(e); break;
case "surrogatepass": e = new PythonSurrogatePassEncoding(e); break;
default: e = new PythonErrorHandlerEncoding(context, e, errors); break;
}

byte[]? preamble = includePreamble ? e.GetPreamble() : null;
Expand All @@ -1893,7 +1896,7 @@ static Encoding setFallback(Encoding enc, EncoderFallback fb) {
}
e.GetBytes(s, 0, s.Length, bytes, preambleLen);
} catch (EncoderFallbackException ex) {
if (!ex.Data.Contains("encoding")) ex.Data["encoding"] = encoding;
if (!ex.Data.Contains("encoding")) ex.Data["encoding"] = GetEncodingName(e, normalize: false, defaultName: encoding);
if (!ex.Data.Contains("object")) ex.Data["object"] = s;
throw;
}
Expand Down Expand Up @@ -1951,7 +1954,7 @@ static CodecsInfo() {
d["iso_8859_1"] = d["iso8859_1"] = d["8859"] = d["iso8859"]
= d["cp28591"] = d["28591"] = d["cp819"] = d["819"]
= d["latin_1"] = d["latin1"] = d["latin"] = d["l1"] = makeEncodingProxy(() => Latin1Encoding);
d["cp20127"] = d["us_ascii"] = d["us"] = d["ascii"] = d["646"] = makeEncodingProxy(() => PythonAsciiEncoding.Instance);
d["cp20127"] = d["us_ascii"] = d["us"] = d["ascii"] = d["646"] = makeEncodingProxy(() => Encoding.ASCII);
d["cp65000"] = d["utf_7"] = d["u7"] = d["unicode_1_1_utf_7"] = makeEncodingProxy(() => new UTF7Encoding(allowOptionals: true));
d["cp65001"] = d["utf_8"] = d["utf8"] = d["u8"] = makeEncodingProxy(() => new UTF8Encoding(encoderShouldEmitUTF8Identifier: false));
d["utf_8_sig"] = makeEncodingProxy(() => new UTF8Encoding(encoderShouldEmitUTF8Identifier: true));
Expand Down
Loading