From d82873aa221aeba3d5bc36526282397d5fc34ac0 Mon Sep 17 00:00:00 2001 From: Mike Date: Mon, 20 Jan 2020 16:37:40 +0100 Subject: [PATCH] Add date to updated_by_script Add function to remove dates from dataset titles and optionally use them as date of dataset --- requirements.txt | 2 +- setup.py | 2 +- src/hdx/data/dataset.py | 8 ++++---- src/hdx/data/dataset_title_helper.py | 16 ++++++++-------- src/hdx/version.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/requirements.txt b/requirements.txt index 96133e7e..a09bbd4c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ ckanapi==4.3 -hdx-python-country==2.3.1 +hdx-python-country==2.3.2 ndg-httpsclient==0.5.1 pyasn1==0.4.8 pyOpenSSL==19.1.0 diff --git a/setup.py b/setup.py index 33b44867..8b8be77c 100755 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from setuptools import setup, find_packages requirements = ['ckanapi>=4.3', - 'hdx-python-country>=2.3.1', + 'hdx-python-country>=2.3.2', 'ndg-httpsclient', 'pyasn1', 'pyOpenSSL' diff --git a/src/hdx/data/dataset.py b/src/hdx/data/dataset.py index 5d3a5274..187e122d 100755 --- a/src/hdx/data/dataset.py +++ b/src/hdx/data/dataset.py @@ -856,16 +856,16 @@ def set_dataset_date(self, dataset_date, dataset_end_date=None, date_format=None None """ if allow_range: - startdate, enddate = parse_date_range(dataset_date, date_format=date_format) + startdate, enddate = parse_date_range(dataset_date, date_format=date_format, zero_time=True) if dataset_end_date is not None: - _, enddate = parse_date_range(dataset_end_date, date_format=date_format) + _, enddate = parse_date_range(dataset_end_date, date_format=date_format, zero_time=True) self.set_dataset_date_from_datetime(startdate, enddate) else: - date = parse_date(dataset_date, date_format=date_format) + date = parse_date(dataset_date, date_format=date_format, zero_time=True) if dataset_end_date is None: enddate = None else: - enddate = parse_date(dataset_end_date, date_format=date_format) + enddate = parse_date(dataset_end_date, date_format=date_format, zero_time=True) self.set_dataset_date_from_datetime(date, enddate) def set_dataset_year_range(self, dataset_year, dataset_end_year=None): diff --git a/src/hdx/data/dataset_title_helper.py b/src/hdx/data/dataset_title_helper.py index e2097ba5..f43279d4 100644 --- a/src/hdx/data/dataset_title_helper.py +++ b/src/hdx/data/dataset_title_helper.py @@ -45,7 +45,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges): enddatelr = None deltalr = timedelta(days=1000) try: - startdatelr, enddatelr = parse_date_range(stringlr, fuzzy=fuzzylr) + startdatelr, enddatelr = parse_date_range(stringlr, fuzzy=fuzzylr, zero_time=True) if startdatelr and enddatelr: deltalr = enddatelr - startdatelr except ParserError: @@ -57,7 +57,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges): enddaterl = None deltarl = timedelta(days=1000) try: - startdaterl, enddaterl = parse_date_range(stringrl, fuzzy=fuzzyrl) + startdaterl, enddaterl = parse_date_range(stringrl, fuzzy=fuzzyrl, zero_time=True) if startdaterl and enddaterl: deltarl = enddaterl - startdaterl except ParserError: @@ -71,7 +71,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges): else: year = match.group(0) date_components = (year) - ranges.append(parse_date_range(year)) + ranges.append(parse_date_range(year, zero_time=True)) newtitle = title for date_component in date_components: newtitle = remove_string(newtitle, date_component, PUNCTUATION_MINUS_BRACKETS) @@ -80,7 +80,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges): match = cls.YEAR_PATTERN.search(title, end) try: fuzzy = dict() - startdate, enddate = parse_date_range(title, fuzzy=fuzzy) + startdate, enddate = parse_date_range(title, fuzzy=fuzzy, zero_time=True) if startdate == enddate and len(fuzzy['date']) == 1: # only accept dates where day, month and year are # all together not split throughout the string and where the date is a precise day not a range ranges.append((startdate, enddate)) @@ -108,8 +108,8 @@ def get_date_from_title(cls, title): """ ranges = list() for match in cls.YEAR_RANGE_PATTERN.finditer(title): - startdate = parse_date('%s-01-01' % match.group(1), '%Y-%m-%d') - enddate = parse_date('%s-12-31' % match.group(3), '%Y-%m-%d') + startdate = parse_date('%s-01-01' % match.group(1), '%Y-%m-%d', zero_time=True) + enddate = parse_date('%s-12-31' % match.group(3), '%Y-%m-%d', zero_time=True) ranges.append((startdate, enddate)) newtitle = remove_string(title, match.group(0)) logger.info('Removing date range from title: %s -> %s' % (title, newtitle)) @@ -117,8 +117,8 @@ def get_date_from_title(cls, title): for match in cls.YEAR_RANGE_PATTERN2.finditer(title): first_year = match.group(1) - startdate = parse_date('%s-01-01' % first_year, '%Y-%m-%d') - enddate = parse_date('%s%s-12-31' % (first_year[:2], match.group(3)), '%Y-%m-%d') + startdate = parse_date('%s-01-01' % first_year, '%Y-%m-%d', zero_time=True) + enddate = parse_date('%s%s-12-31' % (first_year[:2], match.group(3)), '%Y-%m-%d', zero_time=True) ranges.append((startdate, enddate)) newtitle = remove_string(title, match.group(0)) logger.info('Removing date range from title: %s -> %s' % (title, newtitle)) diff --git a/src/hdx/version.txt b/src/hdx/version.txt index cc868b62..4bac418e 100755 --- a/src/hdx/version.txt +++ b/src/hdx/version.txt @@ -1 +1 @@ -4.0.1 \ No newline at end of file +4.0.2 \ No newline at end of file