Skip to content

Commit ea32e39

Browse files
tomaszstrejczekgmta
authored andcommitted
AK: Add UnixDateTime::parse() method
Copy parse() method from LibCore::DateTime::parse(). Augment the method to handle parsing from GMT time. Fix incorrect handling of year in '%D' format specifier. Remove all format specifiers related to time zones. Copy relevant tests and add additional ones.
1 parent 820fee4 commit ea32e39

File tree

3 files changed

+395
-0
lines changed

3 files changed

+395
-0
lines changed

AK/Time.cpp

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <AK/Checked.h>
99
#include <AK/DateConstants.h>
10+
#include <AK/GenericLexer.h>
1011
#include <AK/String.h>
1112
#include <AK/StringBuilder.h>
1213
#include <AK/Time.h>
@@ -17,6 +18,7 @@
1718
# define localtime_r(time, tm) localtime_s(tm, time)
1819
# define gmtime_r(time, tm) gmtime_s(tm, time)
1920
# define tzname _tzname
21+
# define timegm _mkgmtime
2022
#endif
2123

2224
namespace AK {
@@ -509,4 +511,261 @@ ByteString UnixDateTime::to_byte_string(StringView format, LocalTime local_time)
509511
return builder.to_byte_string();
510512
}
511513

514+
Optional<UnixDateTime> UnixDateTime::parse(StringView format, StringView string, bool from_gmt)
515+
{
516+
unsigned format_pos = 0;
517+
518+
struct tm tm = {};
519+
tm.tm_isdst = -1;
520+
521+
auto parsing_failed = false;
522+
523+
GenericLexer string_lexer(string);
524+
525+
auto parse_number = [&] {
526+
auto result = string_lexer.consume_decimal_integer<int>();
527+
if (result.is_error()) {
528+
parsing_failed = true;
529+
return 0;
530+
}
531+
return result.value();
532+
};
533+
534+
auto consume = [&](char c) {
535+
if (!string_lexer.consume_specific(c))
536+
parsing_failed = true;
537+
};
538+
539+
auto consume_specific_ascii_case_insensitive = [&](StringView name) {
540+
auto next_string = string_lexer.peek_string(name.length());
541+
if (next_string.has_value() && next_string->equals_ignoring_ascii_case(name)) {
542+
string_lexer.consume(name.length());
543+
return true;
544+
}
545+
return false;
546+
};
547+
548+
while (format_pos < format.length() && !string_lexer.is_eof()) {
549+
if (format[format_pos] != '%') {
550+
consume(format[format_pos]);
551+
format_pos++;
552+
continue;
553+
}
554+
555+
format_pos++;
556+
if (format_pos == format.length())
557+
return {};
558+
559+
switch (format[format_pos]) {
560+
case 'a': {
561+
auto wday = 0;
562+
for (auto name : short_day_names) {
563+
if (consume_specific_ascii_case_insensitive(name)) {
564+
tm.tm_wday = wday;
565+
break;
566+
}
567+
++wday;
568+
}
569+
if (wday == 7)
570+
return {};
571+
break;
572+
}
573+
case 'A': {
574+
auto wday = 0;
575+
for (auto name : long_day_names) {
576+
if (consume_specific_ascii_case_insensitive(name)) {
577+
tm.tm_wday = wday;
578+
break;
579+
}
580+
++wday;
581+
}
582+
if (wday == 7)
583+
return {};
584+
break;
585+
}
586+
case 'h':
587+
case 'b': {
588+
auto mon = 0;
589+
for (auto name : short_month_names) {
590+
if (consume_specific_ascii_case_insensitive(name)) {
591+
tm.tm_mon = mon;
592+
break;
593+
}
594+
++mon;
595+
}
596+
if (mon == 12)
597+
return {};
598+
break;
599+
}
600+
case 'B': {
601+
auto mon = 0;
602+
for (auto name : long_month_names) {
603+
if (consume_specific_ascii_case_insensitive(name)) {
604+
tm.tm_mon = mon;
605+
break;
606+
}
607+
++mon;
608+
}
609+
if (mon == 12)
610+
return {};
611+
break;
612+
}
613+
case 'C': {
614+
int num = parse_number();
615+
tm.tm_year = (num - 19) * 100 + (tm.tm_year % 100);
616+
break;
617+
}
618+
case 'd':
619+
tm.tm_mday = parse_number();
620+
break;
621+
case 'D': {
622+
int mon = parse_number();
623+
consume('/');
624+
int day = parse_number();
625+
consume('/');
626+
int year = parse_number();
627+
tm.tm_mon = mon - 1;
628+
tm.tm_mday = day;
629+
tm.tm_year = year > 1900 ? year - 1900 : (year <= 99 && year > 69 ? year : 100 + year);
630+
break;
631+
}
632+
case 'e':
633+
tm.tm_mday = parse_number();
634+
break;
635+
case 'H':
636+
tm.tm_hour = parse_number();
637+
break;
638+
case 'I': {
639+
int num = parse_number();
640+
tm.tm_hour = num % 12;
641+
break;
642+
}
643+
case 'j':
644+
// a little trickery here... we can get mktime() to figure out mon and mday using out of range values.
645+
// yday is not used so setting it is pointless.
646+
tm.tm_mday = parse_number();
647+
tm.tm_mon = 0;
648+
(void)mktime(&tm);
649+
break;
650+
case 'm': {
651+
int num = parse_number();
652+
tm.tm_mon = num - 1;
653+
break;
654+
}
655+
case 'M':
656+
tm.tm_min = parse_number();
657+
break;
658+
case 'n':
659+
case 't':
660+
string_lexer.consume_while(is_ascii_space);
661+
break;
662+
case 'r':
663+
case 'p': {
664+
auto ampm = string_lexer.consume(2);
665+
if (ampm == "PM") {
666+
if (tm.tm_hour < 12)
667+
tm.tm_hour += 12;
668+
} else if (ampm != "AM") {
669+
return {};
670+
}
671+
break;
672+
}
673+
case 'R':
674+
tm.tm_hour = parse_number();
675+
consume(':');
676+
tm.tm_min = parse_number();
677+
break;
678+
case 'S':
679+
tm.tm_sec = parse_number();
680+
break;
681+
case 'T':
682+
tm.tm_hour = parse_number();
683+
consume(':');
684+
tm.tm_min = parse_number();
685+
consume(':');
686+
tm.tm_sec = parse_number();
687+
break;
688+
case 'w':
689+
tm.tm_wday = parse_number();
690+
break;
691+
case 'y': {
692+
int year = parse_number();
693+
tm.tm_year = year <= 99 && year > 69 ? 1900 + year : 2000 + year;
694+
break;
695+
}
696+
case 'Y': {
697+
int year = parse_number();
698+
tm.tm_year = year - 1900;
699+
break;
700+
}
701+
case 'x': {
702+
auto hours = parse_number();
703+
int minutes;
704+
if (string_lexer.consume_specific(':')) {
705+
minutes = parse_number();
706+
} else {
707+
minutes = hours % 100;
708+
hours = hours / 100;
709+
}
710+
711+
tm.tm_hour -= hours;
712+
tm.tm_min -= minutes;
713+
break;
714+
}
715+
case 'X': {
716+
if (!string_lexer.consume_specific('.'))
717+
return {};
718+
auto discarded = parse_number();
719+
(void)discarded; // NOTE: the tm structure does not support sub second precision, so drop this value.
720+
break;
721+
}
722+
case '+': {
723+
Optional<char> next_format_character;
724+
725+
if (format_pos + 1 < format.length()) {
726+
next_format_character = format[format_pos + 1];
727+
728+
// Disallow another formatter directly after %+. This is to avoid ambiguity when parsing a string like
729+
// "ignoreJan" with "%+%b", as it would be non-trivial to know that where the %b field begins.
730+
if (next_format_character == '%')
731+
return {};
732+
}
733+
734+
auto discarded = string_lexer.consume_until([&](auto ch) { return ch == next_format_character; });
735+
if (discarded.is_empty())
736+
return {};
737+
738+
break;
739+
}
740+
case '%':
741+
consume('%');
742+
break;
743+
default:
744+
parsing_failed = true;
745+
break;
746+
}
747+
748+
if (parsing_failed)
749+
return {};
750+
751+
format_pos++;
752+
}
753+
754+
if (!string_lexer.is_eof() || format_pos != format.length())
755+
return {};
756+
757+
if (from_gmt) {
758+
// When from_gmt is true, the parsed time is in GMT and needs to be converted to Unix time
759+
tm.tm_isdst = 0; // GMT doesn't have daylight saving time
760+
auto gmt_time = timegm(&tm);
761+
if (gmt_time == -1)
762+
return {};
763+
return UnixDateTime::from_seconds_since_epoch(gmt_time);
764+
}
765+
766+
return UnixDateTime::from_unix_time_parts(
767+
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
768+
tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
769+
}
770+
512771
}

AK/Time.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,9 @@ class UnixDateTime : public Detail::UnawareTime {
478478
ErrorOr<String> to_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const;
479479
Utf16String to_utf16_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const;
480480
ByteString to_byte_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const;
481+
// Parses a string in the given format. Does not support time zone-related format specifiers.
482+
// If 'from_gmt' is true, the string is parsed as a GMT time, otherwise it is parsed as a local time.
483+
static Optional<UnixDateTime> parse(StringView format, StringView string, bool from_gmt = false);
481484

482485
// Offsetting a UNIX time by a duration yields another UNIX time.
483486
constexpr UnixDateTime operator+(Duration const& other) const { return UnixDateTime { m_offset + other }; }

0 commit comments

Comments
 (0)