chakra-core
diff --git a/‎bin/ch/Helpers.cpp‎
Lines changed: 39 additions & 61 deletions b/‎bin/ch/Helpers.cpp‎
Lines changed: 39 additions & 61 deletions
diff --git a/‎bin/ch/Helpers.h‎
Lines changed: 1 addition & 1 deletion b/‎bin/ch/Helpers.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/ch/ch.cpp‎
Lines changed: 9 additions & 26 deletions b/‎bin/ch/ch.cpp‎
Lines changed: 9 additions & 26 deletions
diff --git a/‎lib/Parser/Scan.cpp‎
Lines changed: 3 additions & 1 deletion b/‎lib/Parser/Scan.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎lib/Parser/Scan.h‎
Lines changed: 15 additions & 0 deletions b/‎lib/Parser/Scan.h‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎lib/Runtime/Base/ScriptContext.cpp‎
Lines changed: 11 additions & 12 deletions b/‎lib/Runtime/Base/ScriptContext.cpp‎
Lines changed: 11 additions & 12 deletions
diff --git a/‎test/es6/rlexe.xml‎
Lines changed: 7 additions & 7 deletions b/‎test/es6/rlexe.xml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎test/es6/unicode_6_identifiers.js‎
-13 KB b/‎test/es6/unicode_6_identifiers.js‎
-13 KB
diff --git a/‎test/es6/unicode_6_identifiers.baseline‎ renamed to ‎test/es6/unicode_6_identifiers_utf8.baseline‎ b/‎test/es6/unicode_6_identifiers.baseline‎ renamed to ‎test/es6/unicode_6_identifiers_utf8.baseline‎
@@ -4,14 +4,13 @@
 //-------------------------------------------------------------------------------------------------------
 #include "stdafx.h"
 
-HRESULT Helpers::LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, bool* isUtf8Out /*= nullptr*/, UINT* lengthBytesOut /*= nullptr*/)
+HRESULT Helpers::LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, UINT* lengthBytesOut /*= nullptr*/)
 {
     HRESULT hr = S_OK;
     BYTE * pRawBytes = nullptr;
     UINT lengthBytes = 0;
-    bool isUtf8 = false;
     contents = nullptr;
-    FILE * file;
+    FILE * file = nullptr;
 
     //
     // Open the file as a binary file to prevent CRT from handling encoding, line-break conversions,
@@ -42,81 +41,60 @@ HRESULT Helpers::LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, bool* isU
         IfFailGo(E_FAIL);
     }
 
-    //
-    // Determine the file length, in bytes.
-    //
-    fseek(file, 0, SEEK_END);
-    lengthBytes = ftell(file);
-    fseek(file, 0, SEEK_SET);
-    pRawBytes = (LPBYTE)malloc(lengthBytes + sizeof(WCHAR));
-    if (nullptr == pRawBytes)
+    if (file != nullptr)
     {
-        fwprintf(stderr, _u("out of memory"));
-        IfFailGo(E_OUTOFMEMORY);
-    }
-
-    //
-    // Read the entire content as a binary block.
-    //
-    fread(pRawBytes, sizeof(BYTE), lengthBytes, file);
-    fclose(file);
-    *reinterpret_cast<WCHAR*>(pRawBytes + lengthBytes) = 0; // Null terminate it. Could be UTF16
-
-    //
-    // Read encoding, handling any conversion to Unicode.
-    //
-    // Warning: The UNICODE buffer for parsing is supposed to be provided by the host.
-    // This is not a complete read of the encoding. Some encodings like UTF7, UTF1, EBCDIC, SCSU, BOCU could be
-    // wrongly classified as ANSI
-    //
-    {
-        LPCWSTR contentsRaw = reinterpret_cast<LPCWSTR>(pRawBytes);
-        if ((0xEF == *pRawBytes && 0xBB == *(pRawBytes + 1) && 0xBF == *(pRawBytes + 2)))
-        {
-            isUtf8 = true;
-        }
-        else if (0xFFFE == *contentsRaw || (0x0000 == *contentsRaw && 0xFEFF == *(contentsRaw + 1)))
+        // Determine the file length, in bytes.
+        fseek(file, 0, SEEK_END);
+        lengthBytes = ftell(file);
+        fseek(file, 0, SEEK_SET);
+        pRawBytes = (LPBYTE)malloc(lengthBytes + sizeof(WCHAR));
+        if (nullptr == pRawBytes)
         {
-            // unicode unsupported
-            fwprintf(stderr, _u("unsupported file encoding"));
-            IfFailGo(E_UNEXPECTED);
+            fwprintf(stderr, _u("out of memory"));
+            IfFailGo(E_OUTOFMEMORY);
         }
-        else if (0xFEFF == *contentsRaw)
-        {
-            // unicode LE
-            isUtf8 = false;
-        }
-        else
+
+        //
+        // Read the entire content as a binary block.
+        //
+        fread(pRawBytes, sizeof(BYTE), lengthBytes, file);
+        *reinterpret_cast<WCHAR*>(pRawBytes + lengthBytes) = 0; // Null terminate it. Could be UTF16
+
+        //
+        // Read encoding, handling any conversion to Unicode.
+        //
+        // Warning: The UNICODE buffer for parsing is supposed to be provided by the host.
+        // This is not a complete read of the encoding. Some encodings like UTF7, UTF1, EBCDIC, SCSU, BOCU could be
+        // wrongly classified as ANSI
+        //
         {
-            // Assume UTF8
-            isUtf8 = true;
+            LPCWSTR contentsRaw = reinterpret_cast<LPCWSTR>(pRawBytes);
+            if (0xFFFE == *contentsRaw || (0x0000 == *contentsRaw && 0xFEFF == *(contentsRaw + 1)) ||
+                0xFEFF == *contentsRaw)
+            {
+                // unicode unsupported
+                fwprintf(stderr, _u("unsupported file encoding. Only ANSI and UTF8 supported"));
+                IfFailGo(E_UNEXPECTED);
+            }
         }
     }
 
-    if (isUtf8)
-    {
-        contents = reinterpret_cast<LPCSTR>(pRawBytes);
-    }
-    else
-    {
-        LPSTR pNarrow = nullptr;
-        IfFailGo(WideStringToNarrowDynamic(reinterpret_cast<LPCWSTR>(pRawBytes), &pNarrow));
-        contents = pNarrow;
-    }
+    contents = reinterpret_cast<LPCSTR>(pRawBytes);
 
 Error:
     if (SUCCEEDED(hr))
     {
-        if (isUtf8Out)
-        {
-            *isUtf8Out = isUtf8;
-        }
         if (lengthBytesOut)
         {
             *lengthBytesOut = lengthBytes;
         }
     }
 
+    if (file != nullptr)
+    {
+        fclose(file);
+    }
+
     if (pRawBytes && reinterpret_cast<LPCSTR>(pRawBytes) != contents)
     {
         free(pRawBytes);
 
@@ -7,7 +7,7 @@
 class Helpers
 {
 public :
-    static HRESULT LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, bool* isUtf8Out = nullptr, UINT* lengthBytesOut = nullptr);
+    static HRESULT LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, UINT* lengthBytesOut = nullptr);
     static LPCWSTR JsErrorCodeToString(JsErrorCode jsErrorCode);
     static void LogError(__in __nullterminated const char16 *msg, ...);
 };
@@ -309,15 +309,14 @@ HRESULT ExecuteTest(const char* fileName)
     HRESULT hr = S_OK;
     LPCSTR fileContents = nullptr;
     JsRuntimeHandle runtime = JS_INVALID_RUNTIME_HANDLE;
-    bool isUtf8 = false;
     UINT lengthBytes = 0;
 
     JsContextRef context = JS_INVALID_REFERENCE;
 
     char fullPath[_MAX_PATH];
     size_t len = 0;
 
-    hr = Helpers::LoadScriptFromFile(fileName, fileContents, &isUtf8, &lengthBytes);
+    hr = Helpers::LoadScriptFromFile(fileName, fileContents, &lengthBytes);
 
     IfFailGo(hr);
     if (HostConfigFlags::flags.GenerateLibraryByteCodeHeaderIsEnabled)
@@ -359,39 +358,23 @@ HRESULT ExecuteTest(const char* fileName)
 
     if (HostConfigFlags::flags.GenerateLibraryByteCodeHeaderIsEnabled)
     {
-        if (isUtf8)
+        if (HostConfigFlags::flags.GenerateLibraryByteCodeHeader != nullptr && *HostConfigFlags::flags.GenerateLibraryByteCodeHeader != _u('\0'))
         {
-            if (HostConfigFlags::flags.GenerateLibraryByteCodeHeader != nullptr && *HostConfigFlags::flags.GenerateLibraryByteCodeHeader != _u('\0'))
-            {
-                CHAR libraryName[_MAX_PATH];
-                CHAR ext[_MAX_EXT];
-                _splitpath_s(fullPath, NULL, 0, NULL, 0, libraryName, _countof(libraryName), ext, _countof(ext));
-
-                IfFailGo(CreateLibraryByteCodeHeader(fileContents, lengthBytes, HostConfigFlags::flags.GenerateLibraryByteCodeHeader, libraryName));
-            }
-            else
-            {
-                fwprintf(stderr, _u("FATAL ERROR: -GenerateLibraryByteCodeHeader must provide the file name, i.e., -GenerateLibraryByteCodeHeader:<bytecode file name>, exiting\n"));
-                IfFailGo(E_FAIL);
-            }
+            CHAR libraryName[_MAX_PATH];
+            CHAR ext[_MAX_EXT];
+            _splitpath_s(fullPath, NULL, 0, NULL, 0, libraryName, _countof(libraryName), ext, _countof(ext));
+
+            IfFailGo(CreateLibraryByteCodeHeader(fileContents, lengthBytes, HostConfigFlags::flags.GenerateLibraryByteCodeHeader, libraryName));
         }
         else
         {
-            fwprintf(stderr, _u("FATAL ERROR: GenerateLibraryByteCodeHeader flag can only be used on UTF8 file, exiting\n"));
+            fwprintf(stderr, _u("FATAL ERROR: -GenerateLibraryByteCodeHeader must provide the file name, i.e., -GenerateLibraryByteCodeHeader:<bytecode file name>, exiting\n"));
             IfFailGo(E_FAIL);
         }
     }
     else if (HostConfigFlags::flags.SerializedIsEnabled)
     {
-        if (isUtf8)
-        {
-            CreateAndRunSerializedScript(fileName, fileContents, fullPath);
-        }
-        else
-        {
-            fwprintf(stderr, _u("FATAL ERROR: Serialized flag can only be used on UTF8 file, exiting\n"));
-            IfFailGo(E_FAIL);
-        }
+        CreateAndRunSerializedScript(fileName, fileContents, fullPath);
     }
     else
     {
 
@@ -1831,8 +1831,10 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)
 
                     if (Js::NumberUtilities::IsSurrogateUpperPart(upper))
                     {
+                        // Consume the rest of the utf8 bytes for the codepoint
+                        OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);
+                        Assert(decodedUpper == (OLECHAR) upper);
                         ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);
-                        this->template ReadFull<true>(p, last);
                     }
                 }
 
 
@@ -163,6 +163,13 @@ class NullTerminatedUnicodeEncodingPolicy
     static OLECHAR ReadFull(EncodedCharPtr &p, EncodedCharPtr last) { return *p++; }
     static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
     static OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last) { return *p; }
+
+    static OLECHAR ReadSurrogatePairUpper(const EncodedCharPtr&, const EncodedCharPtr& last)
+    {
+        AssertMsg(false, "method should not be called while scanning UTF16 string");
+        return 0xfffe;
+    }
+
     static void RestoreMultiUnits(size_t multiUnits) { }
     static size_t CharacterOffsetToUnitOffset(EncodedCharPtr start, EncodedCharPtr current, EncodedCharPtr last, charcount_t offset) { return offset; }
 
@@ -205,6 +212,14 @@ class UTF8EncodingPolicyBase
         return !IsMultiUnitChar(ch) ? static_cast< OLECHAR >(ch) : ReadRest<bScan>(ch, p, last);
     }
 
+    OLECHAR ReadSurrogatePairUpper(EncodedCharPtr &p, EncodedCharPtr last)
+    {
+        EncodedChar ch = (nullTerminated || p < last) ? *p++ : (p++, 0);
+        Assert(IsMultiUnitChar(ch));
+        this->m_decodeOptions |= utf8::DecodeOptions::doSecondSurrogatePair;
+        return ReadRest<true>(ch, p, last);
+    }
+
     static OLECHAR PeekFirst(EncodedCharPtr p, EncodedCharPtr last) { return (nullTerminated || p < last) ? static_cast< OLECHAR >(*p) : 0; }
 
     OLECHAR PeekFull(EncodedCharPtr p, EncodedCharPtr last)
 
@@ -1591,7 +1591,10 @@ if (!sourceList)
 
         bool isLibraryCode = ((loadScriptFlag & LoadScriptFlag_LibraryCode) == LoadScriptFlag_LibraryCode);
 
-        bool utf8FromExternal = false;
+        // We assume that any buffer that wasn't passed in with LoadScriptFlag_Utf8Source is UTF16-LE
+        // We convert this into CESU-8 using our codex library
+        // isRealUtf8 tracks whether the buffer is truly UTF8 or if it's CESU-8, for the purpose of parsing later
+        bool isRealUtf8 = false;
         if ((loadScriptFlag & LoadScriptFlag_Utf8Source) != LoadScriptFlag_Utf8Source)
         {
             // Convert to UTF8 and then load that
@@ -1629,16 +1632,12 @@ if (!sourceList)
         }
         else
         {
-            // xplat-todo: This is temporary. How to tell if utf8 fromExternal?
-            if (cb >= 3 && script[0] == 0xEF && script[1] == 0xBB && script[2] == 0xBF) // ef bb bf
-            {
-                utf8Script = (LPUTF8)script;
-                cbNeeded = cb;
-            }
-            else
-            {
-                utf8FromExternal = true;
-            }
+            // If LoadScriptFlag_Utf8Source was passed in, then the source buffer is guaranteed to actually be
+            // UTF8, and not CESU-8. JSRT APIs expect real UTF8 buffers to be passed in, not CESU-8, and ch.exe 
+            // only supports ANSI (utf8-compatible) and actual UTF8 files, not UTF16/UTF16-LE encoded files.
+            isRealUtf8 = true;
+            utf8Script = (LPUTF8)script;
+            cbNeeded = cb;
 
             // We do not own the memory passed into DefaultLoadScriptUtf8. We need to save it so we copy the memory.
             if (*ppSourceInfo == nullptr)
@@ -1693,7 +1692,7 @@ if (!sourceList)
         }
 
         ParseNodePtr parseTree;
-        if (utf8FromExternal)
+        if (isRealUtf8)
         {
             Assert((loadScriptFlag & LoadScriptFlag_Utf8Source)
                     == LoadScriptFlag_Utf8Source);
 
@@ -505,8 +505,8 @@
   </test>
   <test>
     <default>
-      <files>unicode_6_identifiers.js</files>
-      <baseline>unicode_6_identifiers.baseline</baseline>
+      <files>unicode_6_identifiers_utf8.js</files>
+      <baseline>unicode_6_identifiers_utf8.baseline</baseline>
       <compile-flags> -ES6Unicode</compile-flags>
       <tags>exclude_win7,exclude_ship</tags>
     </default>
@@ -584,7 +584,7 @@
   </test>
   <test>
     <default>
-      <files>unicode_idDeferParseFunctions.js</files>
+      <files>unicode_idDeferParseFunctions_utf8.js</files>
       <compile-flags>-ES6Unicode</compile-flags>
     </default>
   </test>
@@ -627,17 +627,17 @@
   </test>
   <test>
     <default>
-      <files>unicode_regex_surrogate.js</files>
-      <baseline>unicode_regex_surrogate.baseline</baseline>
+      <files>unicode_regex_surrogate_utf8.js</files>
+      <baseline>unicode_regex_surrogate_utf8.baseline</baseline>
       <compile-flags> -ES6Unicode -ES6RegExSticky</compile-flags>
       <tags>exclude_ship,Slow</tags>
       <timeout>300</timeout>
     </default>
   </test>
   <test>
     <default>
-      <files>unicode_blue_533163.js</files>
-      <baseline>unicode_blue_533163.baseline</baseline>
+      <files>unicode_blue_533163_utf8.js</files>
+      <baseline>unicode_blue_533163_utf8.baseline</baseline>
       <compile-flags> -ES6Unicode</compile-flags>
       <tags>exclude_ship</tags>
     </default>
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`class Helpers`
`8`	`8`	`{`
`9`	`9`	`public :`
`10`		`- static HRESULT LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, bool* isUtf8Out = nullptr, UINT* lengthBytesOut = nullptr);`
	`10`	`+ static HRESULT LoadScriptFromFile(LPCSTR filename, LPCSTR& contents, UINT* lengthBytesOut = nullptr);`
`11`	`11`	`static LPCWSTR JsErrorCodeToString(JsErrorCode jsErrorCode);`
`12`	`12`	`static void LogError(__in __nullterminated const char16 *msg, ...);`
`13`	`13`	`};`
Original file line number	Diff line number	Diff line change
`@@ -1831,8 +1831,10 @@ tokens Scanner<EncodingPolicy>::ScanCore(bool identifyKwds)`
`1831`	`1831`
`1832`	`1832`	`if (Js::NumberUtilities::IsSurrogateUpperPart(upper))`
`1833`	`1833`	`{`
	`1834`	`+ // Consume the rest of the utf8 bytes for the codepoint`
	`1835`	`+ OLECHAR decodedUpper = this->ReadSurrogatePairUpper(p, last);`
	`1836`	`+ Assert(decodedUpper == (OLECHAR) upper);`
`1834`	`1837`	`ch = Js::NumberUtilities::SurrogatePairAsCodePoint(ch, upper);`
`1835`		`- this->template ReadFull<true>(p, last);`
`1836`	`1838`	`}`
`1837`	`1839`	`}`
`1838`	`1840`