Skip to content

Commit

Permalink
Add date to updated_by_script
Browse files Browse the repository at this point in the history
Add function to remove dates from dataset titles and optionally use them as date of dataset
  • Loading branch information
mcarans committed Jan 20, 2020
1 parent ba4552b commit d82873a
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 15 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ckanapi==4.3
hdx-python-country==2.3.1
hdx-python-country==2.3.2
ndg-httpsclient==0.5.1
pyasn1==0.4.8
pyOpenSSL==19.1.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from setuptools import setup, find_packages

requirements = ['ckanapi>=4.3',
'hdx-python-country>=2.3.1',
'hdx-python-country>=2.3.2',
'ndg-httpsclient',
'pyasn1',
'pyOpenSSL'
Expand Down
8 changes: 4 additions & 4 deletions src/hdx/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,16 +856,16 @@ def set_dataset_date(self, dataset_date, dataset_end_date=None, date_format=None
None
"""
if allow_range:
startdate, enddate = parse_date_range(dataset_date, date_format=date_format)
startdate, enddate = parse_date_range(dataset_date, date_format=date_format, zero_time=True)
if dataset_end_date is not None:
_, enddate = parse_date_range(dataset_end_date, date_format=date_format)
_, enddate = parse_date_range(dataset_end_date, date_format=date_format, zero_time=True)
self.set_dataset_date_from_datetime(startdate, enddate)
else:
date = parse_date(dataset_date, date_format=date_format)
date = parse_date(dataset_date, date_format=date_format, zero_time=True)
if dataset_end_date is None:
enddate = None
else:
enddate = parse_date(dataset_end_date, date_format=date_format)
enddate = parse_date(dataset_end_date, date_format=date_format, zero_time=True)
self.set_dataset_date_from_datetime(date, enddate)

def set_dataset_year_range(self, dataset_year, dataset_end_year=None):
Expand Down
16 changes: 8 additions & 8 deletions src/hdx/data/dataset_title_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges):
enddatelr = None
deltalr = timedelta(days=1000)
try:
startdatelr, enddatelr = parse_date_range(stringlr, fuzzy=fuzzylr)
startdatelr, enddatelr = parse_date_range(stringlr, fuzzy=fuzzylr, zero_time=True)
if startdatelr and enddatelr:
deltalr = enddatelr - startdatelr
except ParserError:
Expand All @@ -57,7 +57,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges):
enddaterl = None
deltarl = timedelta(days=1000)
try:
startdaterl, enddaterl = parse_date_range(stringrl, fuzzy=fuzzyrl)
startdaterl, enddaterl = parse_date_range(stringrl, fuzzy=fuzzyrl, zero_time=True)
if startdaterl and enddaterl:
deltarl = enddaterl - startdaterl
except ParserError:
Expand All @@ -71,7 +71,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges):
else:
year = match.group(0)
date_components = (year)
ranges.append(parse_date_range(year))
ranges.append(parse_date_range(year, zero_time=True))
newtitle = title
for date_component in date_components:
newtitle = remove_string(newtitle, date_component, PUNCTUATION_MINUS_BRACKETS)
Expand All @@ -80,7 +80,7 @@ def fuzzy_match_dates_in_title(cls, title, ranges):
match = cls.YEAR_PATTERN.search(title, end)
try:
fuzzy = dict()
startdate, enddate = parse_date_range(title, fuzzy=fuzzy)
startdate, enddate = parse_date_range(title, fuzzy=fuzzy, zero_time=True)
if startdate == enddate and len(fuzzy['date']) == 1: # only accept dates where day, month and year are
# all together not split throughout the string and where the date is a precise day not a range
ranges.append((startdate, enddate))
Expand Down Expand Up @@ -108,17 +108,17 @@ def get_date_from_title(cls, title):
"""
ranges = list()
for match in cls.YEAR_RANGE_PATTERN.finditer(title):
startdate = parse_date('%s-01-01' % match.group(1), '%Y-%m-%d')
enddate = parse_date('%s-12-31' % match.group(3), '%Y-%m-%d')
startdate = parse_date('%s-01-01' % match.group(1), '%Y-%m-%d', zero_time=True)
enddate = parse_date('%s-12-31' % match.group(3), '%Y-%m-%d', zero_time=True)
ranges.append((startdate, enddate))
newtitle = remove_string(title, match.group(0))
logger.info('Removing date range from title: %s -> %s' % (title, newtitle))
title = newtitle

for match in cls.YEAR_RANGE_PATTERN2.finditer(title):
first_year = match.group(1)
startdate = parse_date('%s-01-01' % first_year, '%Y-%m-%d')
enddate = parse_date('%s%s-12-31' % (first_year[:2], match.group(3)), '%Y-%m-%d')
startdate = parse_date('%s-01-01' % first_year, '%Y-%m-%d', zero_time=True)
enddate = parse_date('%s%s-12-31' % (first_year[:2], match.group(3)), '%Y-%m-%d', zero_time=True)
ranges.append((startdate, enddate))
newtitle = remove_string(title, match.group(0))
logger.info('Removing date range from title: %s -> %s' % (title, newtitle))
Expand Down
2 changes: 1 addition & 1 deletion src/hdx/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4.0.1
4.0.2

0 comments on commit d82873a

Please sign in to comment.