|
7 | 7 |
|
8 | 8 | #include <AK/Checked.h> |
9 | 9 | #include <AK/DateConstants.h> |
| 10 | +#include <AK/GenericLexer.h> |
10 | 11 | #include <AK/String.h> |
11 | 12 | #include <AK/StringBuilder.h> |
12 | 13 | #include <AK/Time.h> |
|
17 | 18 | # define localtime_r(time, tm) localtime_s(tm, time) |
18 | 19 | # define gmtime_r(time, tm) gmtime_s(tm, time) |
19 | 20 | # define tzname _tzname |
| 21 | +# define timegm _mkgmtime |
20 | 22 | #endif |
21 | 23 |
|
22 | 24 | namespace AK { |
@@ -509,4 +511,261 @@ ByteString UnixDateTime::to_byte_string(StringView format, LocalTime local_time) |
509 | 511 | return builder.to_byte_string(); |
510 | 512 | } |
511 | 513 |
|
| 514 | +Optional<UnixDateTime> UnixDateTime::parse(StringView format, StringView string, bool from_gmt) |
| 515 | +{ |
| 516 | + unsigned format_pos = 0; |
| 517 | + |
| 518 | + struct tm tm = {}; |
| 519 | + tm.tm_isdst = -1; |
| 520 | + |
| 521 | + auto parsing_failed = false; |
| 522 | + |
| 523 | + GenericLexer string_lexer(string); |
| 524 | + |
| 525 | + auto parse_number = [&] { |
| 526 | + auto result = string_lexer.consume_decimal_integer<int>(); |
| 527 | + if (result.is_error()) { |
| 528 | + parsing_failed = true; |
| 529 | + return 0; |
| 530 | + } |
| 531 | + return result.value(); |
| 532 | + }; |
| 533 | + |
| 534 | + auto consume = [&](char c) { |
| 535 | + if (!string_lexer.consume_specific(c)) |
| 536 | + parsing_failed = true; |
| 537 | + }; |
| 538 | + |
| 539 | + auto consume_specific_ascii_case_insensitive = [&](StringView name) { |
| 540 | + auto next_string = string_lexer.peek_string(name.length()); |
| 541 | + if (next_string.has_value() && next_string->equals_ignoring_ascii_case(name)) { |
| 542 | + string_lexer.consume(name.length()); |
| 543 | + return true; |
| 544 | + } |
| 545 | + return false; |
| 546 | + }; |
| 547 | + |
| 548 | + while (format_pos < format.length() && !string_lexer.is_eof()) { |
| 549 | + if (format[format_pos] != '%') { |
| 550 | + consume(format[format_pos]); |
| 551 | + format_pos++; |
| 552 | + continue; |
| 553 | + } |
| 554 | + |
| 555 | + format_pos++; |
| 556 | + if (format_pos == format.length()) |
| 557 | + return {}; |
| 558 | + |
| 559 | + switch (format[format_pos]) { |
| 560 | + case 'a': { |
| 561 | + auto wday = 0; |
| 562 | + for (auto name : short_day_names) { |
| 563 | + if (consume_specific_ascii_case_insensitive(name)) { |
| 564 | + tm.tm_wday = wday; |
| 565 | + break; |
| 566 | + } |
| 567 | + ++wday; |
| 568 | + } |
| 569 | + if (wday == 7) |
| 570 | + return {}; |
| 571 | + break; |
| 572 | + } |
| 573 | + case 'A': { |
| 574 | + auto wday = 0; |
| 575 | + for (auto name : long_day_names) { |
| 576 | + if (consume_specific_ascii_case_insensitive(name)) { |
| 577 | + tm.tm_wday = wday; |
| 578 | + break; |
| 579 | + } |
| 580 | + ++wday; |
| 581 | + } |
| 582 | + if (wday == 7) |
| 583 | + return {}; |
| 584 | + break; |
| 585 | + } |
| 586 | + case 'h': |
| 587 | + case 'b': { |
| 588 | + auto mon = 0; |
| 589 | + for (auto name : short_month_names) { |
| 590 | + if (consume_specific_ascii_case_insensitive(name)) { |
| 591 | + tm.tm_mon = mon; |
| 592 | + break; |
| 593 | + } |
| 594 | + ++mon; |
| 595 | + } |
| 596 | + if (mon == 12) |
| 597 | + return {}; |
| 598 | + break; |
| 599 | + } |
| 600 | + case 'B': { |
| 601 | + auto mon = 0; |
| 602 | + for (auto name : long_month_names) { |
| 603 | + if (consume_specific_ascii_case_insensitive(name)) { |
| 604 | + tm.tm_mon = mon; |
| 605 | + break; |
| 606 | + } |
| 607 | + ++mon; |
| 608 | + } |
| 609 | + if (mon == 12) |
| 610 | + return {}; |
| 611 | + break; |
| 612 | + } |
| 613 | + case 'C': { |
| 614 | + int num = parse_number(); |
| 615 | + tm.tm_year = (num - 19) * 100 + (tm.tm_year % 100); |
| 616 | + break; |
| 617 | + } |
| 618 | + case 'd': |
| 619 | + tm.tm_mday = parse_number(); |
| 620 | + break; |
| 621 | + case 'D': { |
| 622 | + int mon = parse_number(); |
| 623 | + consume('/'); |
| 624 | + int day = parse_number(); |
| 625 | + consume('/'); |
| 626 | + int year = parse_number(); |
| 627 | + tm.tm_mon = mon - 1; |
| 628 | + tm.tm_mday = day; |
| 629 | + tm.tm_year = year > 1900 ? year - 1900 : (year <= 99 && year > 69 ? year : 100 + year); |
| 630 | + break; |
| 631 | + } |
| 632 | + case 'e': |
| 633 | + tm.tm_mday = parse_number(); |
| 634 | + break; |
| 635 | + case 'H': |
| 636 | + tm.tm_hour = parse_number(); |
| 637 | + break; |
| 638 | + case 'I': { |
| 639 | + int num = parse_number(); |
| 640 | + tm.tm_hour = num % 12; |
| 641 | + break; |
| 642 | + } |
| 643 | + case 'j': |
| 644 | + // a little trickery here... we can get mktime() to figure out mon and mday using out of range values. |
| 645 | + // yday is not used so setting it is pointless. |
| 646 | + tm.tm_mday = parse_number(); |
| 647 | + tm.tm_mon = 0; |
| 648 | + (void)mktime(&tm); |
| 649 | + break; |
| 650 | + case 'm': { |
| 651 | + int num = parse_number(); |
| 652 | + tm.tm_mon = num - 1; |
| 653 | + break; |
| 654 | + } |
| 655 | + case 'M': |
| 656 | + tm.tm_min = parse_number(); |
| 657 | + break; |
| 658 | + case 'n': |
| 659 | + case 't': |
| 660 | + string_lexer.consume_while(is_ascii_space); |
| 661 | + break; |
| 662 | + case 'r': |
| 663 | + case 'p': { |
| 664 | + auto ampm = string_lexer.consume(2); |
| 665 | + if (ampm == "PM") { |
| 666 | + if (tm.tm_hour < 12) |
| 667 | + tm.tm_hour += 12; |
| 668 | + } else if (ampm != "AM") { |
| 669 | + return {}; |
| 670 | + } |
| 671 | + break; |
| 672 | + } |
| 673 | + case 'R': |
| 674 | + tm.tm_hour = parse_number(); |
| 675 | + consume(':'); |
| 676 | + tm.tm_min = parse_number(); |
| 677 | + break; |
| 678 | + case 'S': |
| 679 | + tm.tm_sec = parse_number(); |
| 680 | + break; |
| 681 | + case 'T': |
| 682 | + tm.tm_hour = parse_number(); |
| 683 | + consume(':'); |
| 684 | + tm.tm_min = parse_number(); |
| 685 | + consume(':'); |
| 686 | + tm.tm_sec = parse_number(); |
| 687 | + break; |
| 688 | + case 'w': |
| 689 | + tm.tm_wday = parse_number(); |
| 690 | + break; |
| 691 | + case 'y': { |
| 692 | + int year = parse_number(); |
| 693 | + tm.tm_year = year <= 99 && year > 69 ? 1900 + year : 2000 + year; |
| 694 | + break; |
| 695 | + } |
| 696 | + case 'Y': { |
| 697 | + int year = parse_number(); |
| 698 | + tm.tm_year = year - 1900; |
| 699 | + break; |
| 700 | + } |
| 701 | + case 'x': { |
| 702 | + auto hours = parse_number(); |
| 703 | + int minutes; |
| 704 | + if (string_lexer.consume_specific(':')) { |
| 705 | + minutes = parse_number(); |
| 706 | + } else { |
| 707 | + minutes = hours % 100; |
| 708 | + hours = hours / 100; |
| 709 | + } |
| 710 | + |
| 711 | + tm.tm_hour -= hours; |
| 712 | + tm.tm_min -= minutes; |
| 713 | + break; |
| 714 | + } |
| 715 | + case 'X': { |
| 716 | + if (!string_lexer.consume_specific('.')) |
| 717 | + return {}; |
| 718 | + auto discarded = parse_number(); |
| 719 | + (void)discarded; // NOTE: the tm structure does not support sub second precision, so drop this value. |
| 720 | + break; |
| 721 | + } |
| 722 | + case '+': { |
| 723 | + Optional<char> next_format_character; |
| 724 | + |
| 725 | + if (format_pos + 1 < format.length()) { |
| 726 | + next_format_character = format[format_pos + 1]; |
| 727 | + |
| 728 | + // Disallow another formatter directly after %+. This is to avoid ambiguity when parsing a string like |
| 729 | + // "ignoreJan" with "%+%b", as it would be non-trivial to know that where the %b field begins. |
| 730 | + if (next_format_character == '%') |
| 731 | + return {}; |
| 732 | + } |
| 733 | + |
| 734 | + auto discarded = string_lexer.consume_until([&](auto ch) { return ch == next_format_character; }); |
| 735 | + if (discarded.is_empty()) |
| 736 | + return {}; |
| 737 | + |
| 738 | + break; |
| 739 | + } |
| 740 | + case '%': |
| 741 | + consume('%'); |
| 742 | + break; |
| 743 | + default: |
| 744 | + parsing_failed = true; |
| 745 | + break; |
| 746 | + } |
| 747 | + |
| 748 | + if (parsing_failed) |
| 749 | + return {}; |
| 750 | + |
| 751 | + format_pos++; |
| 752 | + } |
| 753 | + |
| 754 | + if (!string_lexer.is_eof() || format_pos != format.length()) |
| 755 | + return {}; |
| 756 | + |
| 757 | + if (from_gmt) { |
| 758 | + // When from_gmt is true, the parsed time is in GMT and needs to be converted to Unix time |
| 759 | + tm.tm_isdst = 0; // GMT doesn't have daylight saving time |
| 760 | + auto gmt_time = timegm(&tm); |
| 761 | + if (gmt_time == -1) |
| 762 | + return {}; |
| 763 | + return UnixDateTime::from_seconds_since_epoch(gmt_time); |
| 764 | + } |
| 765 | + |
| 766 | + return UnixDateTime::from_unix_time_parts( |
| 767 | + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, |
| 768 | + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); |
| 769 | +} |
| 770 | + |
512 | 771 | } |
0 commit comments