Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 61 lines (54 sloc) 2.023 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
<?php

namespace utf8;


/**
* PCRE Regular expressions for UTF-8.
*
* Note this file is not actually used by the rest of the library but these
* regular expressions can be useful to have available. The regular expressions
* are modified to include full ASCII range including control chars.
*
* @see http://www.w3.org/International/questions/qa-forms-utf-8
* @package php-utf8
* @subpackage utils
*/

/**
* PCRE Pattern to check a UTF-8 string is valid.
*/
define('utf8\VALID_UTF_PATTERN',
'[\x00-\x7F]'. # ASCII (including control chars)
'|[\xC2-\xDF][\x80-\xBF]'. # Non-overlong 2-byte
'|\xE0[\xA0-\xBF][\x80-\xBF]'. # Excluding overlongs
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # Straight 3-byte
'|\xED[\x80-\x9F][\x80-\xBF]'. # Excluding surrogates
'|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # Planes 1-3
'|[\xF1-\xF3][\x80-\xBF]{3}'. # Planes 4-15
'|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # Plane 16
')*$'
);

/**
* PCRE Pattern to match single UTF-8 characters.
*/
define('utf8\SINGLE_CHAR_UTF_PATTERN',
'([\x00-\x7F])'. # ASCII (including control chars)
'|([\xC2-\xDF][\x80-\xBF])'. # Non-overlong 2-byte
'|(\xE0[\xA0-\xBF][\x80-\xBF])'. # Excluding overlongs
'|([\xE1-\xEC\xEE\xEF][\x80-\xBF]{2})'. # Straight 3-byte
'|(\xED[\x80-\x9F][\x80-\xBF])'. # Excluding surrogates
'|(\xF0[\x90-\xBF][\x80-\xBF]{2})'. # Planes 1-3
'|([\xF1-\xF3][\x80-\xBF]{3})'. # Planes 4-15
'|(\xF4[\x80-\x8F][\x80-\xBF]{2})' # Plane 16
);

/**
* PCRE Pattern to locate bad bytes in a UTF-8 string.
*/
define('utf8\BAD_UTF_PATTERN',
'([\x00-\x7F]'. # ASCII (including control chars)
'|[\xC2-\xDF][\x80-\xBF]'. # Non-overlong 2-byte
'|\xE0[\xA0-\xBF][\x80-\xBF]'. # Excluding overlongs
'|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # Straight 3-byte
'|\xED[\x80-\x9F][\x80-\xBF]'. # Excluding surrogates
'|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # Planes 1-3
'|[\xF1-\xF3][\x80-\xBF]{3}'. # Planes 4-15
'|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # Plane 16
'|(.{1}))' # Invalid byte
);
Something went wrong with that request. Please try again.