diff --git a/parent/pom.xml b/parent/pom.xml
index 7ef44a68..e80abc22 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -88,7 +88,7 @@ application while protecting against XSS.
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-    <guava.version>27.1-jre</guava.version>
+    <guava.version>30.1-jre</guava.version>
   </properties>
 
   <build>
diff --git a/scripts/build_for_travis.sh b/scripts/build_for_travis.sh
index 5400a2f5..1aec8fd0 100755
--- a/scripts/build_for_travis.sh
+++ b/scripts/build_for_travis.sh
@@ -35,7 +35,7 @@ if [ -n "$IS_LEGACY" ]; then
 else
     # Build the whole kit-n-kaboodle.
     mvn                             -f aggregate/pom.xml       source:jar javadoc:jar verify $COMMON_FLAGS \
-    && mvn -Dguava.version=27.1-jre -f aggregate/pom.xml clean source:jar javadoc:jar verify $COMMON_FLAGS \
+    && mvn -Dguava.version=30.1-jre -f aggregate/pom.xml clean source:jar javadoc:jar verify $COMMON_FLAGS \
     && mvn jacoco:report coveralls:report \
     && mvn org.sonatype.ossindex.maven:ossindex-maven-plugin:audit -f aggregate $COMMON_FLAGS
 fi
diff --git a/src/main/java/org/owasp/html/Encoding.java b/src/main/java/org/owasp/html/Encoding.java
index 4a2a601f..94fbde98 100644
--- a/src/main/java/org/owasp/html/Encoding.java
+++ b/src/main/java/org/owasp/html/Encoding.java
@@ -29,7 +29,9 @@
 package org.owasp.html;
 
 import java.io.IOException;
-
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
 import javax.annotation.Nullable;
 
 /** Encoders and decoders for HTML. */
@@ -94,6 +96,7 @@ static void stripBannedCodeunits(StringBuilder sb) {
     stripBannedCodeunits(sb, 0);
   }
 
+
   @TCB
   private static void stripBannedCodeunits(StringBuilder sb, int start) {
     int k = start;
@@ -108,13 +111,16 @@ private static void stripBannedCodeunits(StringBuilder sb, int start) {
           if (i+1 < n) {
             char next = sb.charAt(i+1);
             if (Character.isSurrogatePair(ch, next)) {
-              sb.setCharAt(k++, ch);
-              sb.setCharAt(k++, next);
+              // The last two code points in each plane are non-characters that should be elided.
+              if ((ch & 0xfc3f) != 0xd83f || (next & 0xfffe) != 0xdffe) {
+                sb.setCharAt(k++, ch);
+                sb.setCharAt(k++, next);
+              }
               ++i;
             }
           }
           continue;
-        } else if ((ch & 0xfffe) == 0xfffe) {
+        } else if ((ch & 0xfffe) == 0xfffe || (0xfdd0 <= ch && ch <= 0xfdef)) {
           continue;
         }
       }
@@ -139,19 +145,34 @@ private static int longestPrefixOfGoodCodeunits(String s) {
         }
       } else if (0xd800 <= ch) {
         if (ch <= 0xdfff) {
-          if (i+1 < n && Character.isSurrogatePair(ch, s.charAt(i+1))) {
-            ++i;  // Skip over low surrogate since we know it's ok.
+          if (i + 1 < n ) {
+            // could be a surrogate pair
+            char cn = s.charAt(i+1);
+            if( Character.isSurrogatePair(ch,cn) ) {
+              int cp = Character.toCodePoint(ch, cn);
+              // Could be a non-character
+              if ((cp & 0xfffe) == 0xfffe) {
+                // not valid
+                return i;
+              }
+
+              // skip over trailing surrogate since we know it is OK
+              i++;
+            } else {
+              // not a surrogate pair
+              return i;
+            }
           } else {
+            // isolated surrogate at end of string
             return i;
           }
-        } else if ((ch & 0xfffe) == 0xfffe) {
+        } else if ((ch & 0xfffe) == 0xfffe || (0xfdd0 <= ch && ch <= 0xfdef)) {
           return i;
         }
       }
     }
     return -1;
   }
-
   /**
    * Appends an encoded form of plainText to output where the encoding is
    * sufficient to prevent an HTML parser from interpreting any characters in
@@ -196,6 +217,7 @@ static void encodePcdataOnto(String plainText, Appendable output)
     encodeHtmlOnto(plainText, output, "{<!-- -->");
   }
 
+
   /**
    * Appends an encoded form of plainText to putput where the encoding is
    * sufficient to prevent an HTML parser from transitioning out of the
@@ -240,127 +262,94 @@ private static void encodeHtmlOnto(
       char ch = plainText.charAt(i);
       if (ch < REPLACEMENTS.length) {  // Handles all ASCII.
         String repl = REPLACEMENTS[ch];
-        if (ch == '{' && repl == null) {
-          if (i + 1 == n || plainText.charAt(i + 1) == '{') {
-            repl = braceReplacement;
+        if( repl==null ) {
+          if (ch == '{') {
+            if (i + 1 == n || plainText.charAt(i + 1) == '{') {
+              // "{{" detected, so use the brace replacement
+              repl = braceReplacement;
+            }
+          }
+          if (ch == '\r') {
+            // If this CR is followed by a LF, just remove it. Otherwise replace it with a LF.
+            if (i + 1 == n || plainText.charAt(i + 1) != '\n' ) {
+              // CR not followed by LF, so turn into LF
+              repl = "\n";
+            } else {
+              // CRLF, so remove CR
+              repl = "";
+            }
           }
         }
         if (repl != null) {
           output.append(plainText, pos, i).append(repl);
           pos = i + 1;
         }
-      } else if ((0x93A <= ch && ch <= 0xC4C)
-          && (
-              // Devanagari vowel
-              ch <= 0x94F
-              // Benagli vowels
-              || 0x985 <= ch && ch <= 0x994
-              || 0x9BE <= ch && ch < 0x9CC  // 0x9CC (Bengali AU) is ok
-              || 0x9E0 <= ch && ch <= 0x9E3
-              // Telugu vowels
-              || 0xC05 <= ch && ch <= 0xC14
-              || 0xC3E <= ch && ch != 0xC48 /* 0xC48 (Telugu AI) is ok */)) {
-        // https://manishearth.github.io/blog/2018/02/15/picking-apart-the-crashing-ios-string/
-        // > So, ultimately, the full set of cases that cause the crash are:
-        // >   Any sequence <consonant1, virama, consonant2, ZWNJ, vowel>
-        // > in Devanagari, Bengali, and Telugu, where: ...
-
-        // TODO: This is needed as of February 2018, but hopefully not long after that.
-        // We eliminate the ZWNJ which seems the minimally damaging thing to do to
-        // Telugu rendering per the article above:
-        // > a ZWNJ before a vowel doesn’t really do anything for most Indic scripts.
-
-        if (pos < i) {
-          if (plainText.charAt(i - 1) == 0x200C /* ZWNJ */) {
-            output.append(plainText, pos, i - 1);
-            // Drop the ZWNJ on the floor.
-            pos = i;
-          }
-        } else if (output instanceof StringBuilder) {
-          StringBuilder sb = (StringBuilder) output;
-          int len = sb.length();
-          if (len != 0) {
-            if (sb.charAt(len - 1) == 0x200C /* ZWNJ */) {
-              sb.setLength(len - 1);
-            }
-          }
-        }
-      } else if (((char) 0xd800) <= ch) {
-        if (ch <= ((char) 0xdfff)) {
-          char next;
-          if (i + 1 < n
-              && Character.isSurrogatePair(
-                  ch, next = plainText.charAt(i + 1))) {
-            // Emit supplemental codepoints as entity so that they cannot
-            // be mis-encoded as UTF-8 of surrogates instead of UTF-8 proper
-            // and get involved in UTF-16/UCS-2 confusion.
-            int codepoint = Character.toCodePoint(ch, next);
-            output.append(plainText, pos, i);
+      } else if (RISKY_NORMALIZATION.contains(ch)) {
+        // Application of unicode compatibility normalization produces a risky character.
+        output.append(plainText, pos, i);
+        pos = i + 1;
+        appendNumericEntity(ch,output);
+      } else if ((ch <= 0x9f) || (0xfdd0 <= ch && ch <= 0xfdef) || ((ch & 0xfffe) == 0xfffe)) {
+        // Elide C1 escapes and BMP non-characters.
+        output.append(plainText, pos, i);
+        pos = i + 1;
+      } else if (0xd800 <= ch && ch <= 0xdfff) {
+        // handle surrogates
+        char next;
+        if (i + 1 < n && Character.isSurrogatePair(ch, next = plainText.charAt(i + 1))) {
+          // Emit supplemental codepoints as entity so that they cannot
+          // be mis-encoded as UTF-8 of surrogates instead of UTF-8 proper
+          // and get involved in UTF-16/UCS-2 confusion.
+          int codepoint = Character.toCodePoint(ch, next);
+          output.append(plainText, pos, i);
+          // do not append 0xfffe and 0xffff from any plane
+          if( (codepoint & 0xfffe) != 0xfffe ) {
             appendNumericEntity(codepoint, output);
-            ++i;
-            pos = i + 1;
-          } else {
-            output.append(plainText, pos, i);
-            // Elide the orphaned surrogate.
-            pos = i + 1;
           }
-        } else if (0xfe60 <= ch) {
-          // Is a control character or possible full-width version of a
-          // special character, a BOM, or one of the FE60 block that might
-          // be elided or normalized to an HTML special character.
-          // Running
-          //   cat NormalizationText.txt \
-          //     | perl -pe 's/ ?#.*//' \
-          //     | egrep '(;003C(;|$)|003E|0026|0022|0027|0060)'
-          // dumps a list of code-points that can normalize to HTML special
-          // characters.
+          ++i;
+          pos = i + 1;
+        } else {
           output.append(plainText, pos, i);
+          // Elide the orphaned surrogate.
           pos = i + 1;
-          if ((ch & 0xfffe) == 0xfffe) {
-            // Elide since not an the XML Character.
-          } else {
-            appendNumericEntity(ch, output);
-          }
         }
-      } else if (ch == '\u1FEF') {  // Normalizes to backtick.
-        output.append(plainText, pos, i).append("&#8175;");
-        pos = i + 1;
       }
     }
     output.append(plainText, pos, n);
   }
 
+
+  /**
+   * Append a codepoint to the output as a numeric entity.
+   *
+   * @param codepoint the codepoint
+   * @param output    the output
+   *
+   * @throws IOException              if the output cannot be written to
+   * @throws IllegalArgumentException if the codepoint cannot be represented as a numeric escape.
+   */
   @TCB
   static void appendNumericEntity(int codepoint, Appendable output)
       throws IOException {
+    if (((codepoint <= 0x1f) && (codepoint != 9 && codepoint != 0xa)) || (0x7f <= codepoint && codepoint <= 0x9f)) {
+      throw new IllegalArgumentException("Illegal numeric escape. Cannot represent control code: " + codepoint);
+    }
+    if ((0xfdd0 <= codepoint && codepoint <= 0xfdef) || ((codepoint & 0xfffe) == 0xfffe)) {
+      throw new IllegalArgumentException("Illegal numeric escape. Cannot represent non-character: " + codepoint);
+    }
+
     output.append("&#");
     if (codepoint < 100) {
-      // TODO: is this dead code due to REPLACEMENTS above.
-      if (codepoint < 10) {
-        output.append((char) ('0' + codepoint));
-      } else {
-        output.append((char) ('0' + (codepoint / 10)));
-        output.append((char) ('0' + (codepoint % 10)));
-      }
+      // Below 100, a decimal representation is shortest
+      output.append(Integer.toString(codepoint));
     } else {
-      int nDigits = (codepoint < 0x1000
-                     ? codepoint < 0x100 ? 2 : 3
-                     : (codepoint < 0x10000 ? 4
-                        : codepoint < 0x100000 ? 5 : 6));
+      // Append a hexadecimal value
       output.append('x');
-      for (int digit = nDigits; --digit >= 0;) {
-        int hexDigit = (codepoint >>> (digit << 2)) & 0xf;
-        output.append(HEX_NUMERAL[hexDigit]);
-      }
+      output.append(Integer.toHexString(codepoint));
     }
     output.append(";");
   }
 
-  private static final char[] HEX_NUMERAL = {
-   '0', '1', '2', '3', '4', '5', '6', '7',
-   '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
-  };
-
   /** Maps ASCII chars that need to be encoded to an equivalent HTML entity. */
   private static final String[] REPLACEMENTS = new String[0x80];
   static {
@@ -385,17 +374,41 @@ static void appendNumericEntity(int codepoint, Appendable output)
     REPLACEMENTS['>']  = "&gt;";                     // HTML special.
     REPLACEMENTS['@']  = "&#" + ((int) '@')  + ";";  // Conditional compilation.
     REPLACEMENTS['`']  = "&#" + ((int) '`')  + ";";  // Attribute delimiter.
+    REPLACEMENTS['\u007f']  = "";                    // Elide delete
   }
 
   /**
    * IS_BANNED_ASCII[i] where is an ASCII control character codepoint (&lt; 0x20)
    * is true for control characters that are not allowed in an XML source text.
    */
-  private static boolean[] IS_BANNED_ASCII = new boolean[0x20];
+  private static final boolean[] IS_BANNED_ASCII = new boolean[0x20];
   static {
     for (int i = 0; i < IS_BANNED_ASCII.length; ++i) {
       IS_BANNED_ASCII[i] = !(i == '\t' || i == '\n' || i == '\r');
     }
   }
 
+  /** Set of all Unicode characters which when processed with unicode compatibility decomposition will include a non-alphanumeric ascii character. */
+  static final Set<Character> RISKY_NORMALIZATION;
+  static {
+    HashSet<Character> set = new HashSet<Character>();
+
+    // These characters all decompose riskily
+    String singles = "\u037e\u1fef\u203c\u207a\u208a\u2100\u2101\u2105\u2106\u2260\u226e\u226f\u33c2\u33c7\u33d8\ufb29\ufe10\ufe19\ufe30\ufe47\ufe48\ufe52";
+    for(char ch : singles.toCharArray()) {
+      set.add(ch);
+    }
+
+    // This string is composed of pairs of characters defining inclusive start and end ranges.
+    String pairs =
+              "\u2024\u2026\u2047\u2049\u207c\u207e\u208c\u208e\u2474\u24b5\u2a74\u2a76\u3200\u321e\u3220\u3243\ufe13\ufe16\ufe33"
+            + "\ufe38\ufe4d\ufe50\ufe54\ufe57\ufe59\ufe5c\ufe5f\ufe66\ufe68\ufe6b\uff01\uff0f\uff1a\uff20\uff3b\uff40\uff5b\uff5e";
+    for(int i=0;i<pairs.length();i+=2) {
+      for(char ch=pairs.charAt(i);ch<=pairs.charAt(i+1);ch++) {
+        set.add(ch);
+      }
+    }
+
+    RISKY_NORMALIZATION = Collections.unmodifiableSet(set);
+  }
 }
diff --git a/src/main/java/org/owasp/html/HtmlLexer.java b/src/main/java/org/owasp/html/HtmlLexer.java
index 00fcc7dc..376c939c 100644
--- a/src/main/java/org/owasp/html/HtmlLexer.java
+++ b/src/main/java/org/owasp/html/HtmlLexer.java
@@ -527,7 +527,7 @@ private HtmlToken parseToken() {
             break;
           }
         }
-      } else if (!Character.isWhitespace(ch)) {
+      } else if (!isAsciiWhitespace(ch)) {
         type = HtmlTokenType.TEXT;
         for (; end < limit; ++end) {
           ch = input.charAt(end);
@@ -538,12 +538,12 @@ private HtmlToken parseToken() {
               && '>' == input.charAt(end + 1)) {
             break;
           } else if ('>' == ch || '=' == ch
-                     || Character.isWhitespace(ch)) {
+                     || isAsciiWhitespace(ch)) {
             break;
           } else if ('"' == ch || '\'' == ch) {
             if (end + 1 < limit) {
               char ch2 = input.charAt(end + 1);
-              if (Character.isWhitespace(ch2)
+              if (isAsciiWhitespace(ch2)
                   || ch2 == '>' || ch2 == '/') {
                 ++end;
                 break;
@@ -554,7 +554,7 @@ private HtmlToken parseToken() {
       } else {
         // We skip whitespace tokens inside tag bodies.
         type = HtmlTokenType.IGNORABLE;
-        while (end < limit && Character.isWhitespace(input.charAt(end))) {
+        while (end < limit && isAsciiWhitespace(input.charAt(end))) {
           ++end;
         }
       }
@@ -604,7 +604,7 @@ private HtmlToken parseToken() {
               ch = input.charAt(end);
               switch (state) {
                 case TAGNAME:
-                  if (Character.isWhitespace(ch)
+                  if (isAsciiWhitespace(ch)
                       || '>' == ch || '/' == ch || '<' == ch) {
                     // End processing of an escape exempt block when we see
                     // a corresponding end tag.
@@ -749,6 +749,17 @@ private String canonicalElementName(int start, int end) {
     return HtmlLexer.canonicalElementName(input.substring(start, end));
   }
 
+  /**
+   * Test if a character is an ASCII whitespace according to the HTML rules. Other Unicode whitespace characters do not count.
+   *
+   * @param ch the character to test
+   *
+   * @return true if it is one of TAB, LF, FF, CR or SPACE
+   */
+  private static boolean isAsciiWhitespace(int ch) {
+    return (ch == ' ') || (ch == '\t') || (ch == '\n') || (ch == '\r') || (ch == '\f');
+  }
+
   private static boolean isIdentStart(char ch) {
     return ch >= 'A' && ch <= 'z' && (ch <= 'Z' || ch >= 'a');
   }
diff --git a/src/test/java/org/owasp/html/ElidedCharactersTest.java b/src/test/java/org/owasp/html/ElidedCharactersTest.java
new file mode 100644
index 00000000..21e98d83
--- /dev/null
+++ b/src/test/java/org/owasp/html/ElidedCharactersTest.java
@@ -0,0 +1,143 @@
+package org.owasp.html;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import junit.framework.TestCase;
+import org.junit.Test;
+
+/**
+ * Some characters should not appear in HTML documents, present risks for log-file injection, or are otherwise discouraged from sanitized HTML. This set of
+ * unit tests verifies that the inclusion of such characters does not allow dangerous code to slip through.
+ * <p>
+ * There are two requirements:
+ * <p>
+ * 1) The Encoding.encodeRcdataOnto method should remove discouraged characters.
+ * 2) Sanitized HTML should not change
+ *
+ * @author Simon Greatrix on 25/01/2021.
+ */
+public class ElidedCharactersTest extends TestCase {
+
+  /** List of all characters that are discouraged in HTML. */
+  static List<String> DISCOURAGED;
+
+
+  @Test
+  public static final void testRemoveDiscouragedCharacterFromTagStart() throws IOException {
+    // <Xp></p> is an unrecognised tag and an unmatched end tag
+    for (String d : DISCOURAGED) {
+      String test = "<" + d+"h1></h1>";
+      String html = Sanitizers.BLOCKS.sanitize(test);
+      String m = String.format("Use in <h1> of U+%06x", d.codePointAt(0));
+      assertEquals(m, "&lt;h1&gt;", html);
+    }
+
+    String html = Sanitizers.BLOCKS.sanitize("<h1></h1>");
+    assertEquals("<h1></h1>",html);
+  }
+
+  @Test
+  public static final void testRemoveDiscouragedCharacterFromInsideTag() throws IOException {
+    // <h1X></h1> is an unrecognised tag and an unmatched end tag
+    for (String d : DISCOURAGED) {
+      String test = "<h"+d+"1></h1>";
+      String html = Sanitizers.BLOCKS.sanitize(test);
+      String m = String.format("Use in <h1> of U+%06x", d.codePointAt(0));
+      assertEquals(m, "", html);
+    }
+
+    String html = Sanitizers.BLOCKS.sanitize("<h1></h1>");
+    assertEquals("<h1></h1>",html);
+  }
+
+  @Test
+  public static final void testRemoveDiscouragedCharacterFromTagEnd() throws IOException {
+    // <h1X></h1> is an unrecognised tag and an unmatched end tag
+    for (String d : DISCOURAGED) {
+      String test = "<h1"+ d+"></h1>";
+      String html = Sanitizers.BLOCKS.sanitize(test);
+      String m = String.format("Use in <h1> of U+%06x", d.codePointAt(0));
+      assertEquals(m, "", html);
+    }
+
+    String html = Sanitizers.BLOCKS.sanitize("<h1></h1>");
+    assertEquals("<h1></h1>",html);
+  }
+
+  @Test
+  public static final void testRemoveDiscouragedCharacterFromEndWhenEncoding() throws IOException {
+    for (String d : DISCOURAGED) {
+      String test = "Hello" + d;
+      StringBuilder builder = new StringBuilder();
+      Encoding.encodePcdataOnto(test, builder);
+      String m = String.format("Elision of U+%06x", d.codePointAt(0));
+      assertEquals(m, "Hello", builder.toString());
+    }
+  }
+
+
+  @Test
+  public static final void testRemoveDiscouragedCharacterFromMiddleWhenEncoding() throws IOException {
+    for (String d : DISCOURAGED) {
+      String test = "Hel" + d + "lo";
+      StringBuilder builder = new StringBuilder();
+      Encoding.encodePcdataOnto(test, builder);
+      String m = String.format("Elision of U+%06x", d.codePointAt(0));
+      assertEquals(m, "Hello", builder.toString());
+    }
+  }
+
+
+  @Test
+  public static final void testRemoveDiscouragedCharacterFromStartWhenEncoding() throws IOException {
+    for (String d : DISCOURAGED) {
+      String test = d + "Hello";
+      StringBuilder builder = new StringBuilder();
+      Encoding.encodePcdataOnto(test, builder);
+      String m = String.format("Elision of U+%06x", d.codePointAt(0));
+      assertEquals(m, "Hello", builder.toString());
+    }
+  }
+
+
+  static {
+    ArrayList<String> list = new ArrayList<String>();
+
+    // C0 characters banned by XML, except for the three official whitespace characters
+    for (char i = 0; i <= 0x1f; i++) {
+      if (i != 0x9 && i != 0xa && i != 0xd && i!=0xc) {
+        list.add(Character.toString(i));
+      }
+    }
+
+    // Delete character and C1 escapes which are discouraged by XML and banned as HTML numeric escapes. Also discouraging the U+0085 NEL characters.
+    for (char i = 0x7f; i <= 0x9f; i++) {
+      list.add(Character.toString(i));
+    }
+
+    // Isolated surrogates. NB Must also test that valid non-isolated surrogates are retained.
+    for (char i = 0xd800; i <= 0xdfff; i++) {
+      list.add(Character.toString(i));
+    }
+
+    // Isolated surrogates. NB Must also test that valid non-isolated surrogates are retained.
+    for (char i = 0xfdd0; i <= 0xfdef; i++) {
+      list.add(Character.toString(i));
+    }
+
+    list.add(Character.toString((char) 0xfffe));
+    list.add(Character.toString((char) 0xffff));
+
+    // Non-characters from the supplemental planes
+    for (int i = 1; i <= 16; i++) {
+      list.add(new String(Character.toChars(0x10000 * i + 0xfffe)));
+      list.add(new String(Character.toChars(0x10000 * i + 0xffff)));
+    }
+
+    DISCOURAGED = Collections.unmodifiableList(list);
+  }
+
+}
diff --git a/src/test/java/org/owasp/html/EncodingTest.java b/src/test/java/org/owasp/html/EncodingTest.java
index eea7769a..86223db3 100644
--- a/src/test/java/org/owasp/html/EncodingTest.java
+++ b/src/test/java/org/owasp/html/EncodingTest.java
@@ -28,6 +28,11 @@
 
 package org.owasp.html;
 
+import java.io.IOException;
+import java.text.Normalizer;
+import java.text.Normalizer.Form;
+import java.util.HashSet;
+
 import org.junit.Test;
 
 import junit.framework.TestCase;
@@ -207,6 +212,29 @@ public static final void testDecodeHtml() {
     assertEquals(
         "&bogus;",
         Encoding.decodeHtml("&bogus;"));
+
+    assertEquals(
+        "lt<",
+        Encoding.decodeHtml("lt&lt;"));
+    assertEquals(
+        "ltlt;",
+        Encoding.decodeHtml("ltlt;"));
+    assertEquals(
+        "lt&lt;",
+        Encoding.decodeHtml("lt&&#108;t;"));
+    assertEquals(
+        "lt&<",
+        Encoding.decodeHtml("lt&&lt;"));
+
+    assertEquals(
+        "lt&&lt;gt",
+        Encoding.decodeHtml("\ufdddlt&&l\ufffet;\udc9c\ud835gt"));
+    assertEquals(
+        "lt&<",
+        Encoding.decodeHtml("lt&&lt;\udc9c"));
+    assertEquals(
+        "lt&<",
+        Encoding.decodeHtml("lt&&lt;\ud835"));
   }
 
   @Test
@@ -214,9 +242,10 @@ public static final void testAppendNumericEntityAndEncodeOnto()
       throws Exception {
     StringBuilder sb = new StringBuilder();
     StringBuilder cps = new StringBuilder();
+    // Test with a set of legal code points
     for (int codepoint : new int[] {
-        0, 9, '\n', '@', 0x80, 0xff, 0x100, 0xfff, 0x1000, 0x123a, 0xffff,
-        0x10000, Character.MAX_CODE_POINT }) {
+        9, '\n', '@', 0xa0, 0xff, 0x100, 0xfff, 0x1000, 0x123a, 0xfffd,
+        0x10000, Character.MAX_CODE_POINT-2 }) {
       Encoding.appendNumericEntity(codepoint, sb);
       sb.append(' ');
 
@@ -224,18 +253,43 @@ public static final void testAppendNumericEntityAndEncodeOnto()
     }
 
     assertEquals(
-         "&#0; &#9; &#10; &#64; &#x80; &#xff; &#x100; &#xfff; &#x1000; "
-         + "&#x123a; &#xffff; &#x10000; &#x10ffff; ",
+         "&#9; &#10; &#64; &#xa0; &#xff; &#x100; &#xfff; &#x1000; "
+         + "&#x123a; &#xfffd; &#x10000; &#x10fffd; ",
          sb.toString());
 
     StringBuilder out = new StringBuilder();
     Encoding.encodeHtmlAttribOnto(cps.toString(), out);
     assertEquals(
-        " \t \n &#64; \u0080 \u00ff \u0100 \u0fff \u1000 "
-        + "\u123a  &#x10000; &#x10ffff; ",
+        "\t \n &#64; \u00a0 \u00ff \u0100 \u0fff \u1000 "
+        + "\u123a \ufffd &#x10000; &#x10fffd; ",
         out.toString());
   }
 
+  @Test
+  public static final void testAppendIllegalNumericEntityAndEncodeOnto()
+      throws Exception {
+    StringBuilder sb = new StringBuilder();
+    StringBuilder cps = new StringBuilder();
+    // Test with a set of legal code points
+    for (int codepoint : new int[] { 8, '\r', 0x7f, 0x85, 0xfdd0, 0xfffe, 0x1fffe, 0x3ffff }) {
+      try {
+        Encoding.appendNumericEntity(codepoint, sb);
+        fail("Illegal character was accepted: "+codepoint);
+      } catch ( IllegalArgumentException e ) {
+        // expected behaviour
+      }
+
+      cps.appendCodePoint(codepoint).append(',');
+    }
+
+    assertEquals("", sb.toString());
+
+    StringBuilder out = new StringBuilder();
+    Encoding.encodeHtmlAttribOnto(cps.toString(), out);
+    assertEquals(
+        ",\n,,,,,,,",
+        out.toString());
+  }
   @Test
   public static final void testAngularJsBracesInTextNode() throws Exception {
     StringBuilder sb = new StringBuilder();
@@ -276,9 +330,21 @@ public static final void testStripBannedCodeunits() {
     assertStripped("foo\ud800\udc00bar", "foo\udc00\ud800\udc00bar");
     assertStripped("foo\ud834\udd1ebar", "foo\ud834\udd1ebar");
     assertStripped("foo\ud834\udd1e", "foo\ud834\udd1e");
-    assertStripped("\uffef\ufffd", "\uffef\ufffd\ufffe\uffff");
+
+    // Check stripping of non-characters from all planes
+    for(int i=0;i<=16;i++) {
+      int o = 0x10000 * i;
+      String s = new StringBuilder().append(String.format("%02x",i)).appendCodePoint(o+0xffef).appendCodePoint(o+0xfffd)
+          .appendCodePoint(o+0xfffe).appendCodePoint(o+0xffff).toString();
+      String t = s.substring(0,(i==0)?4:6);
+      assertStripped(t,s);
+
+      s = new StringBuilder().append("foo").appendCodePoint(o+0xfffe).appendCodePoint(o+0xffff).append("bar").toString();
+      assertStripped("foobar",s);
+    }
   }
 
+
   @Test
   public static final
   void testBadlyDonePostProcessingWillnotAllowInsertingNonceAttributes()
@@ -305,4 +371,66 @@ void testBadlyDonePostProcessingWillnotAllowInsertingNonceAttributes()
     Encoding.encodeHtmlAttribOnto("a nonce=xyz ", attrib);
     assertEquals("a nonce&#61;xyz ", attrib.toString());
   }
+
+  @Test
+  public static final void testRiskyNormalizationSetContents() {
+    // Test that the risky normalization set contains the expected values
+    for(char toTest='\u0080'; toTest<'\ufffe'; toTest++) {
+      boolean isRisky = false;
+      String decomposed = Normalizer.normalize(Character.toString(toTest), Form.NFKD);
+      for(int i=0;i<decomposed.length();i++) {
+        char ch = decomposed.charAt(i);
+        if( (' '<ch && ch<'0') || ('9'<ch && ch<'A') || ('Z'<ch && ch<'a') || ('z'<ch && ch<'\u007f') ) {
+          // Contains a non-alpha-numeric ASCII printable character, so we consider it a risky decomposition.
+          isRisky = true;
+          break;
+        }
+      }
+
+      if( isRisky ) {
+        assertTrue(Encoding.RISKY_NORMALIZATION.contains(toTest));
+      } else {
+        assertFalse(Encoding.RISKY_NORMALIZATION.contains(toTest));
+      }
+    }
+  }
+
+
+  @Test
+  public static final void testRiskyNormalization() throws IOException {
+    StringBuilder attrib = new StringBuilder();
+    Encoding.encodeRcdataOnto("Small Less-than Sign : \ufe64",attrib);
+    assertEquals("Small Less-than Sign : &#xfe64;",attrib.toString());
+
+    attrib.setLength(0);
+    Encoding.encodeRcdataOnto("Fullwidth Quotation Mark : \uff02",attrib);
+    assertEquals("Fullwidth Quotation Mark : &#xff02;",attrib.toString());
+
+    attrib.setLength(0);
+    Encoding.encodeRcdataOnto("Greek Varia : \u1fef",attrib);
+    assertEquals("Greek Varia : &#x1fef;",attrib.toString());
+  }
+
+  @Test
+  public static final void testNewLineNormalization() throws IOException {
+    StringBuilder attrib = new StringBuilder();
+    Encoding.encodeRcdataOnto("\rone\ntwo\r",attrib);
+    assertEquals("\none\ntwo\n",attrib.toString());
+
+    attrib.setLength(0);
+    Encoding.encodeRcdataOnto("\none\rtwo\n",attrib);
+    assertEquals("\none\ntwo\n",attrib.toString());
+
+    attrib.setLength(0);
+    Encoding.encodeRcdataOnto("\r\none\r\ntwo\r\n",attrib);
+    assertEquals("\none\ntwo\n",attrib.toString());
+
+    attrib.setLength(0);
+    Encoding.encodeRcdataOnto("\n\rone\n\rtwo\n\r",attrib);
+    assertEquals("\n\none\n\ntwo\n\n",attrib.toString());
+
+    attrib.setLength(0);
+    Encoding.encodeRcdataOnto("\r\rone\n\ntwo\r\r",attrib);
+    assertEquals("\n\none\n\ntwo\n\n",attrib.toString());
+  }
 }
diff --git a/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/src/test/java/org/owasp/html/HtmlSanitizerTest.java
index 53ff9270..0ce72f68 100644
--- a/src/test/java/org/owasp/html/HtmlSanitizerTest.java
+++ b/src/test/java/org/owasp/html/HtmlSanitizerTest.java
@@ -392,53 +392,6 @@ public static final void testNbsps() {
             codeUnits));
   }
 
-  @Test
-  public static final void testMacOSAndIOSQueryOfDeath() {
-    // https://manishearth.github.io/blog/2018/02/15/picking-apart-the-crashing-ios-string/
-    String[][] tests = {
-        {
-          "\u0C1C\u0C4D\u0C1E\u200C\u0C3E",
-          "\u0C1C\u0C4D\u0C1E\u0C3E",
-        },
-        {
-          "\u09B8\u09CD\u09B0<interrupted>\u200C\u09C1",
-          "\u09B8\u09CD\u09B0\u09C1",
-        },
-        {
-          "\u0C1C\u0C4D\u0C1E\u200C\u0C3E",
-          "\u0C1C\u0C4D\u0C1E\u0C3E",
-        },
-        {
-          "\u09B8\u09CD\u09B0\u200C<interrupted>\u09C1",
-          "\u09B8\u09CD\u09B0\u09C1",
-        },
-        {
-          "&#x0C1C;&#x0C4D;&#x0C1E;&#x200C;&#x0C3E;",
-          "\u0C1C\u0C4D\u0C1E\u0C3E",
-        },
-        {
-          "&#x0C1C;&#x0C4D;&#x0C1E;<interrupted>&#x200C;&#x0C3E;",
-          "\u0C1C\u0C4D\u0C1E\u0C3E",
-        },
-        {
-          "&#x09B8;&#x09CD;&#x09B0;&#x200C;&#x09C1;",
-          "\u09B8\u09CD\u09B0\u09C1",
-        },
-        {
-          "&#x09B8;&#x09CD;&#x09B0;&#x200C;<interrupted>&#x09C1;",
-          "\u09B8\u09CD\u09B0\u09C1",
-        },
-        {
-          "\u0915\u094D\u0930\u200C\u093E",
-          "\u0915\u094D\u0930\u093E",
-        },
-    };
-
-    for (int i = 0, n = tests.length; i < n; ++i) {
-      String[] test = tests[i];
-      assertEquals(i + " : " + test[0], test[1], sanitize(test[0]));
-    }
-  }
 
   private static String sanitize(@Nullable String html) {
     StringBuilder sb = new StringBuilder();
diff --git a/src/test/java/org/owasp/html/SanitizersTest.java b/src/test/java/org/owasp/html/SanitizersTest.java
index c75fbcb4..32092d20 100644
--- a/src/test/java/org/owasp/html/SanitizersTest.java
+++ b/src/test/java/org/owasp/html/SanitizersTest.java
@@ -313,7 +313,10 @@ public static final void testScriptInTable() {
       .and(Sanitizers.STYLES)
       .and(Sanitizers.IMAGES)
       .and(Sanitizers.TABLES);
-    assertEquals("<table></table>Hallo\r\n\nEnde\n\r", pf.sanitize(input));
+    // The CRLF after "Hallo" becomes LF
+    // The LF before "Ende" becomes LF
+    // The LF CR after "Ende" becomes LF LF
+    assertEquals("<table></table>Hallo\n\nEnde\n\n", pf.sanitize(input));
   }
 
   @Test