<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -1,4 +1,5 @@
 #include &lt;stdio.h&gt;
+#define non_printing_control_char(byte) ((byte) &lt;= 0x08 || ((byte) &gt;= 0x0E &amp;&amp; (byte) &lt;= 0x1F))
 
 long find_first_non_us_ascii (FILE *fp) {
   int byte;
@@ -7,7 +8,7 @@ long find_first_non_us_ascii (FILE *fp) {
     byte = fgetc(fp);
     if (byte == EOF)
       return -1;
-    if (byte &gt;= 0x7F)
+    if (byte &gt;= 0x7F || non_printing_control_char(byte))
       return ftell(fp) - 1;
   }
 }
@@ -21,6 +22,8 @@ int is_iso_8859_15 (FILE *fp) {
       return 1;
     if (byte &gt;= 0x7F &amp;&amp; byte &lt;= 0x9F)
       return 0;
+    if (non_printing_control_char(byte))
+      return 0;
   }
 }
 
@@ -34,5 +37,7 @@ int is_windows_1252 (FILE *fp) {
     // Shortcut common case
     if (byte &gt;= 0x7F &amp;&amp; (byte == 0x7F || byte == 0x81 || byte == 0x8D || byte == 0x8F || byte == 0x90 || byte == 0x9D))
       return 0;
+    if (non_printing_control_char(byte))
+      return 0;
   }
 }</diff>
      <filename>src/one_byte_detector.c</filename>
    </modified>
    <modified>
      <diff>@@ -7,8 +7,13 @@ utf8_null:
   byte = fgetc(fp);
   if (byte == EOF)
     goto utf8_finished;
-  if (byte &lt; 0x7F)         // US-ASCII
-    goto utf8_null;
+  if (byte &lt; 0x7F) {       // US-ASCII
+    if (byte &gt;= 0x20)
+      goto utf8_null;
+    if (byte &gt;= 0x09 &amp;&amp; byte &lt;= 0x0D)
+      goto utf8_null;
+    goto utf8_error;
+  }
   if (byte &gt;&gt; 5 == 0x06)   // 110x xxxx  2-byte sequence
     goto utf8_2b_1;
   if (byte &gt;&gt; 4 == 0x0E)   // 1110 xxxx  3-byte-sequence</diff>
      <filename>src/utf8_detector.c</filename>
    </modified>
    <modified>
      <diff>@@ -3,7 +3,7 @@ require File.join(File.dirname(__FILE__), 'common')
 class ISO_8859_15_Test &lt; Test::Unit::TestCase
   include DetencHelper
 
-  INVALID_BYTES = (0x7F..0x9F).to_a
+  INVALID_BYTES = (0x00..0x08).to_a + (0x0E..0x1F).to_a + (0x7F..0x9F).to_a
 
   def test_should_be_iso_8859_15_if_it_contains_all_valid_bytes
     sample = (0..0xFF).inject(''){ |s, b|</diff>
      <filename>test/test_iso_8859_15.rb</filename>
    </modified>
    <modified>
      <diff>@@ -3,14 +3,21 @@ require File.join(File.dirname(__FILE__), 'common')
 class US_ASCII_Test &lt; Test::Unit::TestCase
   include DetencHelper
 
-  def test_should_be_us_ascii_if_it_contains_all_bytes_under_0x7F
-    sample = (0..0x7E).inject(''){ |s, b|
+  def test_should_be_us_ascii_if_it_contains_all_printable_bytes_under_0x7F
+    sample = ((0x09..0x0D).to_a + (0x20..0x7E).to_a).inject(''){ |s, b|
       s &lt;&lt; [b].pack('C')
     }
     assert_equal US_ASCII, detenc(sample)
   end
 
-  def test_should_be_us_ascii_if_it_contains_any_byte_of_0x7F_or_more
+  def test_should_not_be_us_ascii_if_it_contains_any_non_printable_bytes_under_0x7F
+    ((0x00..0x08).to_a + (0x0E..0x1F).to_a).each do |invalid_byte|
+      sample = [invalid_byte].pack('C')
+      assert_not_equal US_ASCII, detenc(sample), &quot;%02X is not #{US_ASCII}&quot; % invalid_byte
+    end
+  end
+
+  def test_should_not_be_us_ascii_if_it_contains_any_byte_of_0x7F_or_more
     (0x7F..0xFF).each do |invalid_byte|
       sample = [invalid_byte].pack('C')
       assert_not_equal US_ASCII, detenc(sample), &quot;%02X is not #{US_ASCII}&quot; % invalid_byte</diff>
      <filename>test/test_us_ascii.rb</filename>
    </modified>
    <modified>
      <diff>@@ -6,28 +6,40 @@ class UTF8StressTest &lt; Test::Unit::TestCase
   # From Markus Kuhn's stress tests
   # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
 
-  def assert_valid(data)
+  def assert_valid(data, message='')
     detected = detenc(data)
     allowed = [US_ASCII, UTF_8]
-    assert allowed.include?(detected), &quot;Expected #{detected.inspect} to be one of #{allowed.inspect}&quot;
+    assert allowed.include?(detected), &quot;Expected #{detected.inspect} to be one of #{allowed.inspect} #{message}&quot;
   end
 
-  def assert_invalid(data)
+  def assert_invalid(data, message='')
     detected = detenc(data)
     disallowed = [US_ASCII, UTF_8]
-    assert !disallowed.include?(detected), &quot;Expected #{detected.inspect} not to be one of #{disallowed.inspect}&quot;
+    assert !disallowed.include?(detected), &quot;Expected #{detected.inspect} not to be one of #{disallowed.inspect} #{message}&quot;
   end
 
   def test_should_reject_DEL
     assert_invalid &quot;\x7f&quot;
   end
 
+  def test_should_reject_unprintable_control_characters_under_0x20
+    [ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0e, 0x0f,
+      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+      0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f ].each do |b|
+      assert_invalid [b].pack('C'), &quot;Byte #{b.to_s(16)}&quot;
+    end
+  end
+
+  def test_should_accept_printable_control_characters_under_0x20
+    assert_valid &quot;\x09\x0a\x0b\x0c\x0d&quot;
+  end
+
   def test_should_accept_greek_kosme
     assert_valid &quot;\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5&quot;
   end
 
   def test_should_accept_first_possible_sequence_for_1_byte_sequence
-    assert_valid &quot;\x00&quot;
+    assert_valid &quot;\x09&quot;
   end
 
   def test_should_accept_first_possible_sequence_for_2_byte_sequence</diff>
      <filename>test/test_utf_8.rb</filename>
    </modified>
    <modified>
      <diff>@@ -3,7 +3,7 @@ require File.join(File.dirname(__FILE__), 'common')
 class Windows1252Test &lt; Test::Unit::TestCase
   include DetencHelper
 
-  INVALID_BYTES = [0x7F, 0x81, 0x8D, 0x8F, 0x90, 0x9D]
+  INVALID_BYTES = (0x00..0x08).to_a + (0x0E..0x1F).to_a + [0x7F, 0x81, 0x8D, 0x8F, 0x90, 0x9D]
 
   def test_should_be_windows_1252_if_it_contains_all_valid_bytes
     sample = (0..0xFF).inject(''){ |s, b|</diff>
      <filename>test/test_windows_1252.rb</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>018acde72cafefa0db71897ffaa9f57b187d0dcc</id>
    </parent>
  </parents>
  <author>
    <name>Paul Battley</name>
    <email>pbattley@gmail.com</email>
  </author>
  <url>http://github.com/threedaymonk/detenc/commit/621e260c6ae0af827371cd3265d30fc8c0399789</url>
  <id>621e260c6ae0af827371cd3265d30fc8c0399789</id>
  <committed-date>2009-01-23T06:40:22-08:00</committed-date>
  <authored-date>2009-01-23T06:40:22-08:00</authored-date>
  <message>Non-printable control characters (everything under 0x20 except 0x09 through 0x0D) shouldn't appear in text. Let's reject them.</message>
  <tree>b02ef6363d7e905b2e6319f9ab297c8751a3fa19</tree>
  <committer>
    <name>Paul Battley</name>
    <email>pbattley@gmail.com</email>
  </committer>
</commit>
