-
Notifications
You must be signed in to change notification settings - Fork 5
/
PhpDateTimeParser.php
214 lines (180 loc) · 7.01 KB
/
PhpDateTimeParser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
<?php
namespace ValueParsers;
use DataValues\TimeValue;
use DateTime;
use Exception;
/**
* Time parser using PHP's DateTime object. Since the behavior of PHP's parser can be quite odd
* (for example, it pads missing elements with the current date and does actual calculations such as
* parsing "2015-00-00" as "2014-12-30") this parser should only be used as a fallback.
*
* This class implements heuristics to guess which sequence of digits in the input represents the
* year. This is relevant because PHP's parser can only handle 4-digit years as expected. The
* following criteria are used to identify the year:
*
* - The first number longer than 2 digits or bigger than 59.
* - The first number in the input, if it is bigger than 31.
* - The third of three space-separated parts at the beginning of the input, if it is a number.
* - The third number in the input.
* - The last number in the input otherwise.
*
* @since 0.7
*
* @license GPL-2.0-or-later
* @author Addshore
* @author Thiemo Kreuz
*/
class PhpDateTimeParser extends StringValueParser {
private const FORMAT_NAME = 'php-date-time';
/**
* @var MonthNameUnlocalizer
*/
private $monthNameUnlocalizer;
/**
* @var ValueParser
*/
private $eraParser;
/**
* @var ValueParser
*/
private $isoTimestampParser;
/**
* @param MonthNameUnlocalizer $monthNameUnlocalizer Used to translate month names to English,
* the language PHP's DateTime parser understands.
* @param ValueParser $eraParser String parser that detects signs, "BC" suffixes and such and
* returns an array with the detected sign character and the remaining value.
* @param ValueParser $isoTimestampParser String parser that gets a language independent
* YMD-ordered timestamp and returns a TimeValue object. Used for precision detection.
*/
public function __construct(
MonthNameUnlocalizer $monthNameUnlocalizer,
ValueParser $eraParser,
ValueParser $isoTimestampParser
) {
parent::__construct();
$this->monthNameUnlocalizer = $monthNameUnlocalizer;
$this->eraParser = $eraParser;
$this->isoTimestampParser = $isoTimestampParser;
}
/**
* @param string $value in a format as specified by the PHP DateTime object
* there are exceptions as we can handel 5+ digit dates
*
* @throws ParseException
* @return TimeValue
*/
protected function stringParse( $value ) {
$rawValue = $value;
try {
list( $sign, $value ) = $this->eraParser->parse( $value );
$value = trim( $value );
$value = $this->monthNameUnlocalizer->unlocalize( $value );
$year = $this->fetchAndNormalizeYear( $value );
$value = $this->getValueWithFixedSeparators( $value, $year );
$this->validateDateTimeInput( $value );
// Parse using the DateTime object (this will allow us to format the date in a nicer way)
$dateTime = new DateTime( $value );
// Fail if the DateTime object does calculations like changing 2015-00-00 to 2014-12-30.
if ( $year !== null && $dateTime->format( 'Y' ) !== substr( $year, -4 ) ) {
throw new ParseException( $value . ' is not a valid date.' );
}
// Input was one, two, or three numbers? Where the heck does a time come from?
if ( $dateTime->format( 'H:i:s' ) !== '00:00:00'
&& preg_match( '/^\D*\d+(?:\D+\d+){0,2}\D*$/', $value )
) {
throw new ParseException( $value . ' is not a valid date.' );
}
if ( $year !== null && strlen( $year ) > 4 ) {
$timestamp = $sign . $year . $dateTime->format( '-m-d\TH:i:s\Z' );
} else {
$timestamp = $sign . $dateTime->format( 'Y-m-d\TH:i:s\Z' );
}
// Use a common base parser for precision detection and option handling.
return $this->isoTimestampParser->parse( $timestamp );
} catch ( Exception $exception ) {
throw new ParseException( $exception->getMessage(), $rawValue, self::FORMAT_NAME );
}
}
/**
* @param string $value
*
* @throws ParseException
*/
private function validateDateTimeInput( $value ) {
// we don't support input of non-digits only, such as 'x'.
if ( !preg_match( '/\d/', $value ) ) {
throw new ParseException( $value . ' does not contain a digit.' );
}
// @todo i18n support for these exceptions
// we don't support dates in format of year + timezone
if ( preg_match( '/^\d{1,7}(\+\d*|\D*)$/', $value ) ) {
throw new ParseException( $value . ' is not a valid date.' );
}
}
/**
* PHP's DateTime object does not accept spaces as separators between year, month and day,
* e.g. dates like 20 12 2012, but we want to support them.
* See http://de1.php.net/manual/en/datetime.formats.date.php
*
* @param string $value
* @param string|null $year
*
* @return string
*/
private function getValueWithFixedSeparators( $value, $year = null ) {
// Since PHP 8.1.7 YYYY-DDD means the DDDth day of the year, thus only add dashes
// if we have up to two digits in the second field.
$isYmd = $year !== null && preg_match( '/^\D*' . $year . '\D+\d{1,2}\D+\d+\D*$/', $value );
$separator = $isYmd ? '-' : '.';
// Meant to match separator characters after day and month. \p{L} matches letters outside
// the ASCII range.
return preg_replace( '/(?<=[\d\p{L}])[.,\s]\s*/', $separator, $value );
}
/**
* Tries to find and pad the sequence of digits in the input that represents the year.
* Refer to the class level documentation for a description of the heuristics used.
*
* @param string &$value A time value string, possibly containing a year. If found, the year in
* the string will be cut and padded to exactly 4 digits.
*
* @return string|null The full year, if found, not cut but padded to at least 4 digits.
*/
private function fetchAndNormalizeYear( &$value ) {
// NOTE: When changing the regex matching below, keep the class level
// documentation of the extraction heuristics up to date!
$patterns = array(
// Check if the string contains a number longer than 2 digits or bigger than 59.
'/(?<!\d)(' // cannot be prepended by a digit
. '\d{3,}|' // any number longer than 2 digits, or
. '[6-9]\d' // any number bigger than 59
. ')(?!\d)/', // cannot be followed by a digit
// Check if the first number in the string is bigger than 31.
'/^\D*(3[2-9]|[4-9]\d)/',
// Check if the string starts with three space-separated parts or three numbers.
'/^(?:'
. '\S+\s+\S+\s+|' // e.g. "July<SPACE>4th<SPACE>", or
. '\d+\D+\d+\D+' // e.g. "4.7."
. ')(\d+)/', // followed by a number
// Check if the string ends with a number.
'/(\d+)\D*$/',
);
foreach ( $patterns as $pattern ) {
if ( preg_match( $pattern, $value, $matches, PREG_OFFSET_CAPTURE ) ) {
break;
}
}
if ( !isset( $matches[1] ) ) {
return null;
}
$year = $matches[1][0];
$index = $matches[1][1];
$length = strlen( $year );
// Trim irrelevant leading zeros.
$year = ltrim( $year, '0' );
// Pad to at least 4 digits.
$year = str_pad( $year, 4, '0', STR_PAD_LEFT );
// Manipulate the value to have an exactly 4-digit year. Crucial for PHP's DateTime object.
$value = substr_replace( $value, substr( $year, -4 ), $index, $length );
return $year;
}
}