-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Restructured the parser a bit, made it a bit faster
- Loading branch information
Showing
4 changed files
with
146 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import re | ||
import itertools | ||
from datetime import date, timedelta | ||
|
||
#: Regular expressions that can be tagged, sorted by priority. | ||
EXPRESSIONS = [] | ||
|
||
#: Compile set of regular expressions to parse ISO dates. | ||
#: | ||
#: Besides the fact that it would be necessary to fix the number | ||
#: of year digits to support proper parsing, we do not because we parse a | ||
#: bit more fuzzy to support a wider range of formats. | ||
#: Thus we cannot distinguish between various ISO date formats | ||
#: but just "support them". | ||
#: ISO 8601 expanded DATE formats allow an arbitrary number of year | ||
#: digits with a leading +/- sign. | ||
|
||
EXPRESSIONS.extend([ | ||
# 1. complete dates: | ||
# YYYY-MM-DD or +- YYYYYY-MM-DD... extended date format | ||
('complete_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})" | ||
r"-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})"), | ||
|
||
# YYYYMMDD or +- YYYYYYMMDD... basic date format | ||
('basic_date', r"(?P<sign>[+-]){0}(?P<year>[0-9]{4})" | ||
r"(?P<month>[0-9]{2})(?P<day>[0-9]{2})"), | ||
|
||
# YYYYMMDD or +- YYYYYYMMDD... basic date format | ||
('basic_date', r"(?P<sign>[+-]){1}(?P<year>[0-9]{6})" | ||
r"(?P<month>[0-9]{2})(?P<day>[0-9]{2})"), | ||
|
||
# 2. complete week dates: | ||
# YYYY-Www-D or +-YYYYYY-Www-D ... extended week date | ||
('complete_week_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})" | ||
r"-W(?P<week>[0-9]{2})-(?P<day>[0-9]{1})"), | ||
|
||
# YYYYWwwD or +-YYYYYYWwwD ... basic week date | ||
('basic_week_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})W" | ||
r"(?P<week>[0-9]{2})(?P<day>[0-9]{1})"), | ||
|
||
# 3. ordinal dates: | ||
# YYYY-DDD or +-YYYYYY-DDD ... extended format | ||
('ordinal_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})" | ||
r"-(?P<day>[0-9]{3})"), | ||
|
||
# YYYYDDD or +-YYYYYYDDD ... basic format | ||
('basic_date_format', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})" | ||
r"(?P<day>[0-9]{3})"), | ||
|
||
# 4. week dates: | ||
# YYYY-Www or +-YYYYYY-Www ... extended reduced accuracy week date | ||
('week_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})" | ||
r"-W(?P<week>[0-9]{2})"), | ||
|
||
# YYYYWww or +-YYYYYYWww ... basic reduced accuracy week date | ||
('basic_reduced_accuracy_week_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})W" | ||
r"(?P<week>[0-9]{2})"), | ||
|
||
# 5. month dates: | ||
# YYY-MM or +-YYYYYY-MM ... reduced accuracy specific month | ||
('month_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})" | ||
r"-(?P<month>[0-9]{2})"), | ||
|
||
# 7. century dates: | ||
# YY or +-YYYY ... reduced accuracy specific century | ||
('century_date', r"(?P<sign>[+-]){1}" | ||
r"(?P<century>[0-9]{4})$"), | ||
|
||
('century_date', r"(?P<sign>[+-]){0}" | ||
r"(?P<century>[0-9]{2})$"), | ||
|
||
# 6. year dates: | ||
# YYYY or +-YYYYYY ... reduced accuracy specific year | ||
('year_date', r"(?P<sign>[+-]){0,1}(?P<year>[0-9]{4,6})"), | ||
|
||
]) | ||
|
||
|
||
# Compile all regular expressions, eases debugging and boosts | ||
# performance. | ||
for idx, val in enumerate(EXPRESSIONS): | ||
EXPRESSIONS[idx] = (val[0], re.compile(val[1])) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#-*- coding: utf-8 -*- | ||
import re | ||
import itertools | ||
from datetime import date, timedelta | ||
from period.constants import EXPRESSIONS | ||
|
||
|
||
class Parser(object): | ||
|
||
def __init__(self): | ||
#: These rules are named after predefined constants in period.constants | ||
self.rules = { | ||
self.handle_date: ('complete_date', 'basic_date', 'basic_week_date', | ||
'ordinal_date', 'basic_date_format', 'week_date', | ||
'basic_reduced_accuracy_week_date', 'month_date', | ||
'year_date', 'century_date', 'complete_week_date')} | ||
|
||
def parse(self, string): | ||
mapping = {} | ||
for handler, values in self.rules.items(): | ||
for value in values: | ||
mapping[value] = handler | ||
|
||
if not string: | ||
return None | ||
for handler, expr in EXPRESSIONS: | ||
match = expr.match(string) | ||
if match: | ||
return mapping[handler](match) | ||
return None | ||
|
||
def handle_date(self, match): | ||
groups = match.groupdict() | ||
# sign, century, year, month, week, day, | ||
# FIXME: negative dates not possible with python standard types | ||
sign = (groups['sign'] == '-' and -1) or 1 | ||
if 'century' in groups: | ||
return date(sign * (int(groups['century']) * 100 + 1), 1, 1) | ||
if not 'month' in groups: # weekdate or ordinal date | ||
ret = date(sign * int(groups['year']), 1, 1) | ||
if 'week' in groups: | ||
isotuple = ret.isocalendar() | ||
if 'day' in groups: | ||
days = int(groups['day'] or 1) | ||
else: | ||
days = 1 | ||
# if first week in year, do weeks-1 | ||
return ret + timedelta(weeks=int(groups['week']) - | ||
(((isotuple[1] == 1) and 1) or 0), | ||
days = -isotuple[2] + days) | ||
elif 'day' in groups: # ordinal date | ||
return ret + timedelta(days=int(groups['day'])-1) | ||
else: # year date | ||
return ret | ||
# year-, month-, or complete date | ||
if 'day' not in groups or groups['day'] is None: | ||
day = 1 | ||
else: | ||
day = int(groups['day']) | ||
return date(sign * int(groups['year']), | ||
int(groups['month']) or 1, day) |