Avoids null characters in response headers

Fixes http evader case 290 and 16 more The raw headers are provided to Suricata But libhtp internally uses compare function skipping zeroes This is needed by libhtp to interpret some important header names and values (such as Transfer-encoding: chunked) on which we rely to continue parsing correctly
OISF · Jun 20, 2019 · 46b4605 · 46b4605
1 parent 0638b63
commit 46b4605
Show file tree

Hide file tree

Showing 6 changed files with 135 additions and 8 deletions.
diff --git a/htp/bstr.c b/htp/bstr.c
@@ -219,6 +219,10 @@ int bstr_cmp_c_nocase(const bstr *b, const char *c) {
     return bstr_util_cmp_mem_nocase(bstr_ptr(b), bstr_len(b), c, strlen(c));
 }
 
+int bstr_cmp_c_nocasenorzero(const bstr *b, const char *c) {
+    return bstr_util_cmp_mem_nocasenorzero(bstr_ptr(b), bstr_len(b), c, strlen(c));
+}
+
 int bstr_cmp_mem(const bstr *b, const void *data, size_t len) {
     return bstr_util_cmp_mem(bstr_ptr(b), bstr_len(b), data, len);
 }
@@ -295,6 +299,10 @@ int bstr_index_of_c_nocase(const bstr *haystack, const char *needle) {
     return bstr_index_of_mem_nocase(haystack, needle, strlen(needle));
 }
 
+int bstr_index_of_c_nocasenorzero(const bstr *haystack, const char *needle) {
+    return bstr_util_mem_index_of_mem_nocasenorzero(bstr_ptr(haystack), bstr_len(haystack), needle, strlen(needle));
+}
+
 int bstr_index_of_mem(const bstr *haystack, const void *_data2, size_t len2) {
     return bstr_util_mem_index_of_mem(bstr_ptr(haystack), bstr_len(haystack), _data2, len2);
 }
@@ -388,6 +396,38 @@ int bstr_util_cmp_mem_nocase(const void *_data1, size_t len1, const void *_data2
     }
 }
 
+int bstr_util_cmp_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+    const unsigned char *data1 = (const unsigned char *) _data1;
+    const unsigned char *data2 = (const unsigned char *) _data2;
+    size_t p1 = 0, p2 = 0;
+
+    while ((p1 < len1) && (p2 < len2)) {
+        if (data1[p1] == 0) {
+            p1++;
+            continue;
+        }
+        if (tolower(data1[p1]) != tolower(data2[p2])) {
+            // Difference.
+            return (tolower(data1[p1]) < tolower(data2[p2])) ? -1 : 1;
+        }
+
+        p1++;
+        p2++;
+    }
+
+    while((p1 < len1) && (data1[p1] == 0)) {
+        p1++;
+    }
+    if ((p1 == len1) && (p2 == len2)) {
+        // They're identical.
+        return 0;
+    } else {
+        // One string is shorter.
+        if (p1 == len1) return -1;
+        else return 1;
+    }
+}
+
 int64_t bstr_util_mem_to_pint(const void *_data, size_t len, int base, size_t *lastlen) {
     const unsigned char *data = (unsigned char *) _data;
     int64_t rval = 0, tflag = 0;
@@ -496,6 +536,33 @@ int bstr_util_mem_index_of_mem_nocase(const void *_data1, size_t len1, const voi
     return -1;
 }
 
+int bstr_util_mem_index_of_mem_nocasenorzero(const void *_data1, size_t len1, const void *_data2, size_t len2) {
+    const unsigned char *data1 = (unsigned char *) _data1;
+    const unsigned char *data2 = (unsigned char *) _data2;
+    size_t i, j;
+
+    // If we ever want to optimize this function, the following link
+    // might be useful: http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
+
+    for (i = 0; i < len1; i++) {
+        size_t k = i;
+
+        for (j = 0; ((j < len2) && (k < len1)); j++, k++) {
+            if (data1[k] == 0) {
+                j--;
+                continue;
+            }
+            if (toupper(data1[k]) != toupper(data2[j])) break;
+        }
+
+        if (j == len2) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
 void bstr_util_mem_trim(unsigned char **data, size_t *len) {
     if ((data == NULL)||(len == NULL)) return;
 

diff --git a/htp/bstr.h b/htp/bstr.h
@@ -313,6 +313,15 @@ int bstr_cmp_c(const bstr *b, const char *cstr);
  */
 int bstr_cmp_c_nocase(const bstr *b, const char *cstr);
 
+/**
+ * Case-insensitive zero-skipping comparison of a bstring with a NUL-terminated string.
+ *
+ * @param[in] b
+ * @param[in] cstr
+ * @return Zero on string match, 1 if b is greater than cstr, and -1 if cstr is greater than b.
+ */
+int bstr_cmp_c_nocasenorzero(const bstr *b, const char *cstr);
+
 /**
  * Performs a case-sensitive comparison of a bstring with a memory region.
  *
@@ -343,6 +352,16 @@ int bstr_cmp_mem_nocase(const bstr *b, const void *data, size_t len);
  */
 int bstr_cmp_nocase(const bstr *b1, const bstr *b2);
 
+/**
+ * Case-insensitive and zero skipping comparison two bstrings.
+ *
+ * @param[in] b1
+ * @param[in] b2
+ * @return Zero on string match, 1 if b1 is greater than b2, and -1 if b2 is
+ *         greater than b1.
+ */
+int bstr_cmp_nocasenorzero(const bstr *b1, const bstr *b2);
+
 /**
  * Create a new bstring by copying the provided bstring.
  *
@@ -445,6 +464,16 @@ int bstr_index_of_c(const bstr *bhaystack, const char *cneedle);
  */
 int bstr_index_of_c_nocase(const bstr *bhaystack, const char *cneedle);
 
+/**
+ * Find the needle in the haystack, with the needle being a NUL-terminated
+ * string. Ignore case differences. Skip zeroes in haystack
+ *
+ * @param[in] bhaystack
+ * @param[in] cneedle
+ * @return Position of the match, or -1 if the needle could not be found.
+ */
+int bstr_index_of_c_nocasenorzero(const bstr *bhaystack, const char *cneedle);
+
 /**
  * Find the needle in the haystack, with the needle being a memory region.
  *
@@ -508,6 +537,18 @@ int bstr_util_cmp_mem(const void *data1, size_t len1, const void *data2, size_t
  */
  int bstr_util_cmp_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2);
 
+/**
+ * Case-insensitive zero-skipping comparison of two memory regions.
+ *
+ * @param[in] data1
+ * @param[in] len1
+ * @param[in] data2
+ * @param[in] len2
+ * @return Zero if the memory regions are identical, 1 if data1 is greater than
+ *         data2, and -1 if data2 is greater than data1.
+ */
+ int bstr_util_cmp_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2);
+
 /**
  * Convert contents of a memory region to a positive integer.
  *
@@ -564,6 +605,17 @@ int bstr_util_mem_index_of_mem(const void *data1, size_t len1, const void *data2
  */
 int bstr_util_mem_index_of_mem_nocase(const void *data1, size_t len1, const void *data2, size_t len2);
 
+/**
+ * Searches the haystack memory block for the needle memory block. Case sensitive. Skips zeroes in data1
+ *
+ * @param data1
+ * @param len1
+ * @param data2
+ * @param len2
+ * @return Index of the first location of the needle on success, or -1 if the needle was not found.
+ */
+int bstr_util_mem_index_of_mem_nocasenorzero(const void *data1, size_t len1, const void *data2, size_t len2);
+
 /**
  * Removes whitespace from the beginning and the end of a memory region. The data
  * itself is not modified; this function only adjusts the provided pointers.

diff --git a/htp/htp_response.c b/htp/htp_response.c
@@ -641,7 +641,7 @@ htp_status_t htp_connp_RES_BODY_DETERMINE(htp_connp_t *connp) {
         // 2. If a Transfer-Encoding header field (section 14.40) is present and
         //   indicates that the "chunked" transfer coding has been applied, then
         //   the length is defined by the chunked encoding (section 3.6).
-        if ((te != NULL) && (bstr_index_of_c_nocase(te->value, "chunked") != -1)) {
+        if ((te != NULL) && (bstr_index_of_c_nocasenorzero(te->value, "chunked") != -1)) {
             if (bstr_cmp_c_nocase(te->value, "chunked") != 0) {
                 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0,
                         "Transfer-encoding has abnormal chunked value");

diff --git a/htp/htp_response_generic.c b/htp/htp_response_generic.c
@@ -171,9 +171,9 @@ htp_status_t htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t
 
         name_end = colon_pos;
 
-        // Ignore LWS after field-name.
+        // Ignore unprintable after field-name.
         prev = name_end;
-        while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
+        while ((prev > name_start) && (data[prev - 1] <= 0x20)) {
             prev--;
             name_end--;
 
@@ -215,6 +215,12 @@ htp_status_t htp_parse_response_header_generic(htp_connp_t *connp, htp_header_t
 
         i++;
     }
+    for (i = value_start; i < value_end; i++) {
+        if (data[i] == 0) {
+            htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Response header value contains null.");
+            break;
+        }
+    }
 
     // Now extract the name and the value.
     h->name = bstr_dup_mem(data + name_start, name_end - name_start);

diff --git a/htp/htp_table.c b/htp/htp_table.c
@@ -208,7 +208,7 @@ void *htp_table_get_c(const htp_table_t *table, const char *ckey) {
     for (size_t i = 0, n = htp_list_size(&table->list); i < n; i += 2) {
         bstr *key_candidate = htp_list_get(&table->list, i);
         void *element = htp_list_get(&table->list, i + 1);
-        if (bstr_cmp_c_nocase(key_candidate, ckey) == 0) {
+        if (bstr_cmp_c_nocasenorzero(key_candidate, ckey) == 0) {
             return element;
         }
     }

diff --git a/htp/htp_transaction.c b/htp/htp_transaction.c
@@ -1167,13 +1167,15 @@ htp_status_t htp_tx_state_response_headers(htp_tx_t *tx) {
     htp_header_t *ce = htp_table_get_c(tx->response_headers, "content-encoding");
     if (ce != NULL) {
         /* fast paths: regular gzip and friends */
-        if ((bstr_cmp_c_nocase(ce->value, "gzip") == 0) || (bstr_cmp_c_nocase(ce->value, "x-gzip") == 0)) {
+        if ((bstr_cmp_c_nocasenorzero(ce->value, "gzip") == 0) ||
+            (bstr_cmp_c_nocasenorzero(ce->value, "x-gzip") == 0)) {
             tx->response_content_encoding = HTP_COMPRESSION_GZIP;
-        } else if ((bstr_cmp_c_nocase(ce->value, "deflate") == 0) || (bstr_cmp_c_nocase(ce->value, "x-deflate") == 0)) {
+        } else if ((bstr_cmp_c_nocasenorzero(ce->value, "deflate") == 0) ||
+                   (bstr_cmp_c_nocasenorzero(ce->value, "x-deflate") == 0)) {
             tx->response_content_encoding = HTP_COMPRESSION_DEFLATE;
-        } else if (bstr_cmp_c_nocase(ce->value, "lzma") == 0) {
+        } else if (bstr_cmp_c_nocasenorzero(ce->value, "lzma") == 0) {
             tx->response_content_encoding = HTP_COMPRESSION_LZMA;
-        } else if (bstr_cmp_c_nocase(ce->value, "inflate") == 0) {
+        } else if (bstr_cmp_c_nocasenorzero(ce->value, "inflate") == 0) {
             // ignore
         } else {
             /* exceptional cases: enter slow path */