In [None]:
import datetime
import parsedatetime
from functools import reduce

In [None]:
def get_absolute_date(relative_date_string, publication_date=None):
    """
    Turn relative dates into absolute datetimes.
    Currently uses API of parsedatetime
    https://bear.im/code/parsedatetime/docs/index.html

    Parameters:
    -----------
    relative_date_string        the relative date in an article (e.g. 'Last week'): String
    publication_date            the publication_date of the article: datetime
    
    Returns:
    --------
    One of: 
        - a datetime that represents the absolute date of the relative date based on 
            the publication_date
        - None, if parse is not successful
    """

    cal = parsedatetime.Calendar()
    parsed_result = cal.nlp(relative_date_string, publication_date)
    if parsed_result is not None:
        # Parse is successful
        parsed_absolute_date = parsed_result[0][0]

        # Assumption: input date string is in the past
        # If parsed date is in the future (relative to publication_date), 
        #   we roll it back to the past
        
        if publication_date and parsed_absolute_date > publication_date:
            # parsedatetime returns a date in the future
            # likely because year isn't specified or date_string is relative
            
            # Check a specific date is included
            # TODO: Smarter way or regex to check if relative_date_string 
            #       contains a month name?
            months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 
                      'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
            contains_month = reduce( 
                    lambda result, month: result or relative_date_string.lower().find(month) != -1, 
                    months, False)
            
            if contains_month:
                # TODO: Is it enough to just check for month names to determine if a 
                #       date_string specifies a particular date?

                # If date is specified explicity, and year is not
                # roll back 1 year
                return datetime.datetime(parsed_absolute_date.year-1, 
                        parsed_absolute_date.month, parsed_absolute_date.day)
            else:
                # Use the relative datetime delta and roll back
                delta = parsed_absolute_date - publication_date
                num_weeks = int(delta.days/7)
                and_num_days_after = 7 if delta.days%7 == 0 else delta.days%7
                return publication_date - datetime.timedelta(weeks=num_weeks) - \
                        datetime.timedelta(7-and_num_days_after)
        else:
            # Return if date is in the past already or no publication_date is provided
            return parsed_absolute_date
    else:
        # Parse unsucessful
        return None

## Year is not specified

In [None]:
 publication_date = datetime.datetime(2016, 10, 30, 18, 0)

In [None]:
# Before publication_date
get_absolute_date('28th December', publication_date)

In [None]:
# After publication date
get_absolute_date('26th October', publication_date)

In [None]:
get_absolute_date('1 January', publication_date)

## Relative date string

In [None]:
get_absolute_date('2 weeks ago', publication_date)

In [None]:
get_absolute_date('3:30pm', publication_date)

## Year is specified

In [None]:
get_absolute_date('March 3 2014', publication_date)

This is considered **invalid** for now. Since we are assuming articles only contain dates in the past. (for future enhancement)

In [None]:
get_absolute_date('March 3 2018', publication_date)