Skip to content
Permalink
Browse files Browse the repository at this point in the history
Enhance the sanitizing.
  • Loading branch information
eldy committed Jun 29, 2021
1 parent 0d7fccc commit 796b2d2
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 11 deletions.
8 changes: 6 additions & 2 deletions htdocs/core/lib/functions.lib.php
Expand Up @@ -778,8 +778,12 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options =
do {
$oldstringtoclean = $out;

// We replace chars encoded with numeric HTML entities with real char (to avoid to have numeric entities used for obfuscation of injections)
$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+);/i', 'realCharForNumericEntities', $out);
// We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step.
// No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are
// using a non coventionnel way to be encoded, to not have them sanitized just after)
$out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out);

// Now we remove all remaining HTML entities staring with a number. We don't want such entities.
$out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have j&#x61vascript with an entities without the ; to hide the 'a' of 'javascript'.

$out = dol_string_onlythesehtmltags($out, 0, 1, 1);
Expand Down
9 changes: 5 additions & 4 deletions htdocs/main.inc.php
Expand Up @@ -53,25 +53,26 @@

/**
* Return the real char for a numeric entities.
* This function is required by testSqlAndScriptInject().
* WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar.
*
* @param string $matches String of numeric entity
* @return string New value
*/
function realCharForNumericEntities($matches)
{
$newstringnumentity = $matches[1];
//print '$newstringnumentity='.$newstringnumentity;

if (preg_match('/^x/i', $newstringnumentity)) {
$newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity));
$newstringnumentity = hexdec(preg_replace('/;$/', '', preg_replace('/^x/i', '', $newstringnumentity)));
}

// The numeric value we don't want as entities
// The numeric value we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ?
if (($newstringnumentity >= 65 && $newstringnumentity <= 90) || ($newstringnumentity >= 97 && $newstringnumentity <= 122)) {
return chr((int) $newstringnumentity);
}

return '&#'.$matches[1];
return '&#'.$matches[1]; // Value will be unchanged because regex was /&#( )/
}

/**
Expand Down
10 changes: 5 additions & 5 deletions test/phpunit/SecurityTest.php
Expand Up @@ -345,7 +345,7 @@ public function testGETPOST()
$_GET["param5"]="a_1-b";
$_POST["param6"]="&quot;&gt;<svg o&#110;load='console.log(&quot;123&quot;)'&gt;";
$_POST["param6b"]='<<<../>../>../svg><<<../>../>../animate =alert(1)>abc';
$_GET["param7"]='"c:\this is a path~1\aaa&#110;" abc<bad>def</bad>';
$_GET["param7"]='"c:\this is a path~1\aaa&#110; &#x&#x31;&#x31;&#x30;;" abc<bad>def</bad>';
$_POST["param8a"]="Hacker<svg o&#110;load='console.log(&quot;123&quot;)'"; // html tag is not closed so it is not detected as html tag but is still harmfull
$_POST['param8b']='<img src=x onerror=alert(document.location) t='; // this is html obfuscated by non closing tag
$_POST['param8c']='< with space after is ok';
Expand Down Expand Up @@ -479,20 +479,20 @@ public function testGETPOST()
$this->assertEquals('&quot;&gt;', $result);

$result=GETPOST("param7", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('"c:\this is a path~1\aaan" abcdef', $result);
print __METHOD__." result param7 = ".$result."\n";
$this->assertEquals('"c:\this is a path~1\aaan &#x;;;;" abcdef', $result);

$result=GETPOST("param12", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals(trim($_POST["param12"]), $result, 'Test a string with DOCTYPE and restricthtml');

$result=GETPOST("param13", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('n n &gt; &lt; &quot; <a href=\"alert(document.domain)\">XSS</a>', $result, 'Test that HTML entities are decoded with restricthtml, but only for common alpha chars');
$this->assertEquals('n n &gt; &lt; &quot; <a href=\"alert(document.domain)\">XSS</a>', $result, 'Test 13 that HTML entities are decoded with restricthtml, but only for common alpha chars');

$result=GETPOST("param13b", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('n n &gt; &lt; &quot; <a href=\"jvascript:alert(document.domain)\">XSS</a>', $result, 'Test that HTML entities are decoded with restricthtml, but only for common alpha chars');
$this->assertEquals('n n &gt; &lt; &quot; <a href=\"alert(document.domain)\">XSS</a>', $result, 'Test 13b that HTML entities are decoded with restricthtml, but only for common alpha chars');

// Special test for GETPOST of backtopage, backtolist or backtourl parameter

Expand Down

1 comment on commit 796b2d2

@fgeek
Copy link

@fgeek fgeek commented on 796b2d2 Aug 21, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eldy when will be 14.x released?

Please sign in to comment.