# Notes

This module calculates the timedelta between a specific date and dates in a Series and returns:
1. The Series of calculated timedeltas as float
2. A Series of timedeltas rounded and formatted as string with the suffix you give.

In [1]:
import datetime
import pandas as pd
import numpy as np

# Functions

In [2]:
def timedelta_str(timedelta,suffix_singular,suffix_plural):
    """Subfunction of timedelta. Creates a string representation of the timedelta.
    
        Args:
            timedelta: numeric
            suffix_singular: str, string representation of timedelta if value <= 1
            suffix_plural: str, string representation of timedelta if value >= 1
    
        Example:
            In: 
                timedelta_str(timedelta = 2,suffix_singular = 'Day since last purchase',
                suffix_plural = 'Days since last purchase')
            
            Out: 
                "2 Days sinces last purchase"
    """
    
    # Returns null if null
    if pd.isna(timedelta):
        return np.nan
        
    timedelta_str = str(int(timedelta))
    timedelta_int = int(round(timedelta))
    
    if timedelta_int <= 1:
        return timedelta_str +' '+ suffix_singular
    else:
        return timedelta_str +' '+ suffix_plural



def timedelta(datetime_series,
              timedelta_type, 
              suffix_singular, 
              suffix_plural,
              compare_datetime = datetime.datetime.now(),
              null_value = 'No data'):
    """Calculates timedelta between a date (by default now) and dates in a Series.
    Returns timedelta Series as dtype float as well as a string representation (see arguments and usage).
    
        Args:
            datetime_series: pd.Series as type datetime (e.g. using pd.to_datetime)
            timedelta_type: str, "month" or "day"
            suffix_singular: see subfunction timedelta_str
            suffix_plural: see subfunction timedelta_str
            compare_datetime: date to calculate timedelta from (by default now)
            null_value: value to assign if no timedelta can be calculate (e.g. where datetime Series == NaT)
            
        Returns:
            (pd.Series,pd.Series)
    """
    
    
    # Creates a timedelta Series by substracting datetime to compare to the values in the Series
    timedelta_series = datetime_series.map(lambda x: (compare_datetime - x))
    
    # Rounds by month or day
    if timedelta_type == 'month':
        timedelta_series = timedelta_series.map(lambda x: (x.round('30.44D')/30.44).days)

    elif timedelta_type == 'day':
        timedelta_series = timedelta_series.map(lambda x: (x.round('d')).days)

    else:
        raise ValueError('timedelta_type must be either month or day')
    
    # Applies subfunction timedelta_str (see above)
    timedelta_series_str = timedelta_series.apply(timedelta_str,
                                              suffix_singular = suffix_singular,
                                              suffix_plural = suffix_plural)
    
    # If there is no timedelta (e.g. where datetime series was NaT) -> assigns choosen null value
    timedelta_series_str.fillna(null_value,inplace = True)
    
    return timedelta_series_str,timedelta_series

# Usage
Transform cells to code in order to try it.
## Data

data = {'date':['2018-08-06','2017-05-04','2005-01-01','2007-01-01',np.nan]}
df_test = pd.DataFrame(data)
df_test['date'] = pd.to_datetime(df_test['date'])
df_test

## Test

df_test['date_compared'] =  datetime.datetime.now()

df_test['days_timedelta_str'],df_test['days_timedelta'] = timedelta(df_test['date'],
                                                                    timedelta_type = 'day',
                                                                    suffix_singular = 'day since last purchase',
                                                                    suffix_plural = 'days since last purchase')

df_test['months_timedelta_str'],df_test['months_timedelta'] = timedelta(df_test['date'],
                                                                        timedelta_type = 'month',
                                                                        suffix_singular = 'month since last purchase',
                                                                        suffix_plural = 'months since last purchase')

df_test