In [0]:
from pyspark.sql.functions import *

In [0]:
datetimes = [
    ("2014-02-28", "2014-02-28 10:00:00.123"),
    ("2016-03-29", "2016-03-29 11:23:00.234"),
    ("2018-04-20", "2018-04-20 12:34:00.543"),
    ("2019-05-12", "2019-05-12 13:21:00.567")
]

In [0]:
datedf = spark.createDataFrame(datetimes, schema = 'date STRING, time STRING')

In [0]:
datedf.show(truncate=False)

+----------+-----------------------+
|date      |time                   |
+----------+-----------------------+
|2014-02-28|2014-02-28 10:00:00.123|
|2016-03-29|2016-03-29 11:23:00.234|
|2018-04-20|2018-04-20 12:34:00.543|
|2019-05-12|2019-05-12 13:21:00.567|
+----------+-----------------------+



In [0]:
help(date_add)

Help on function date_add in module pyspark.sql.functions:

date_add(start: 'ColumnOrName', days: Union[ForwardRef('ColumnOrName'), int]) -> pyspark.sql.column.Column
    Returns the date that is `days` days after `start`. If `days` is a negative value
    then these amount of days will be deducted from `start`.
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    start : :class:`~pyspark.sql.Column` or str
        date column to work on.
    days : :class:`~pyspark.sql.Column` or str or int
        how many days after the given date to calculate.
        Accepts negative value as well to calculate backwards in time.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        a date after/before given number of days.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add'])
    >>> df.select(date_add(df.dt, 1).alias('next_date')).collect()
    [Ro

In [0]:
help(date_sub)

Help on function date_sub in module pyspark.sql.functions:

date_sub(start: 'ColumnOrName', days: Union[ForwardRef('ColumnOrName'), int]) -> pyspark.sql.column.Column
    Returns the date that is `days` days before `start`. If `days` is a negative value
    then these amount of days will be added to `start`.
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    start : :class:`~pyspark.sql.Column` or str
        date column to work on.
    days : :class:`~pyspark.sql.Column` or str or int
        how many days before the given date to calculate.
        Accepts negative value as well to calculate forward in time.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        a date before/after given number of days.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'sub'])
    >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect()
    [Row(pre

In [0]:
datedf.\
    withColumn("date_add_date", date_add("date", 10)). \
    withColumn("date_add_time", date_add("time", 10)). \
    withColumn("date_sub_date", date_sub("date",10)). \
    withColumn("date_sub_time", date_sub("time", 10)). \
    show(truncate=False)

+----------+-----------------------+-------------+-------------+-------------+-------------+
|date      |time                   |date_add_date|date_add_time|date_sub_date|date_sub_time|
+----------+-----------------------+-------------+-------------+-------------+-------------+
|2014-02-28|2014-02-28 10:00:00.123|2014-03-10   |2014-03-10   |2014-02-18   |2014-02-18   |
|2016-03-29|2016-03-29 11:23:00.234|2016-04-08   |2016-04-08   |2016-03-19   |2016-03-19   |
|2018-04-20|2018-04-20 12:34:00.543|2018-04-30   |2018-04-30   |2018-04-10   |2018-04-10   |
|2019-05-12|2019-05-12 13:21:00.567|2019-05-22   |2019-05-22   |2019-05-02   |2019-05-02   |
+----------+-----------------------+-------------+-------------+-------------+-------------+



In [0]:
help(date_diff)

Help on function date_diff in module pyspark.sql.functions:

date_diff(end: 'ColumnOrName', start: 'ColumnOrName') -> pyspark.sql.column.Column
    Returns the number of days from `start` to `end`.
    
    .. versionadded:: 3.5.0
    
    Parameters
    ----------
    end : :class:`~pyspark.sql.Column` or str
        to date column to work on.
    start : :class:`~pyspark.sql.Column` or str
        from date column to work on.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        difference in days between two dates.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
    >>> df.select(date_diff(df.d2, df.d1).alias('diff')).collect()
    [Row(diff=32)]



In [0]:
datedf.show(truncate=False)

+----------+-----------------------+
|date      |time                   |
+----------+-----------------------+
|2014-02-28|2014-02-28 10:00:00.123|
|2016-03-29|2016-03-29 11:23:00.234|
|2018-04-20|2018-04-20 12:34:00.543|
|2019-05-12|2019-05-12 13:21:00.567|
+----------+-----------------------+



In [0]:
datedf. \
    withColumn("datediff_date", date_diff(current_date(), "date")). \
    withColumn("datediff_time", date_diff(current_timestamp(), "time")). \
    show(truncate = False)

+----------+-----------------------+-------------+-------------+
|date      |time                   |datediff_date|datediff_time|
+----------+-----------------------+-------------+-------------+
|2014-02-28|2014-02-28 10:00:00.123|3509         |3509         |
|2016-03-29|2016-03-29 11:23:00.234|2749         |2749         |
|2018-04-20|2018-04-20 12:34:00.543|1997         |1997         |
|2019-05-12|2019-05-12 13:21:00.567|1610         |1610         |
+----------+-----------------------+-------------+-------------+



In [0]:
help(months_between)

Help on function months_between in module pyspark.sql.functions:

months_between(date1: 'ColumnOrName', date2: 'ColumnOrName', roundOff: bool = True) -> pyspark.sql.column.Column
    Returns number of months between dates date1 and date2.
    If date1 is later than date2, then the result is positive.
    A whole number is returned if both inputs have the same day of month or both are the last day
    of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
    The result is rounded off to 8 digits unless `roundOff` is set to `False`.
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    date1 : :class:`~pyspark.sql.Column` or str
        first date column.
    date2 : :class:`~pyspark.sql.Column` or str
        second date column.
    roundOff : bool, optional
        whether to round (to 8 digits) the final value or not (default: True).
    
    Returns
    -----

In [0]:
help(add_months)

Help on function add_months in module pyspark.sql.functions:

add_months(start: 'ColumnOrName', months: Union[ForwardRef('ColumnOrName'), int]) -> pyspark.sql.column.Column
    Returns the date that is `months` months after `start`. If `months` is a negative value
    then these amount of months will be deducted from the `start`.
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    start : :class:`~pyspark.sql.Column` or str
        date column to work on.
    months : :class:`~pyspark.sql.Column` or str or int
        how many months after the given date to calculate.
        Accepts negative value as well to calculate backwards.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        a date after/before given number of months.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('2015-04-08', 2)], ['dt', 'add'])
    >>> df.select(add_months(df.dt, 1).alias('next_month'))


most of date functions return date inspite of using it on timetsamp

In [0]:
datedf. \
    withColumn("months_between_date", round(months_between(current_date(), "date"),2)). \
    withColumn("months_between_time", round(months_between(current_timestamp(), "time"),2)). \
    withColumn("add_months_date", add_months("date", 3)). \
    withColumn("add_months_time", add_months("time",3)). \
    show(truncate = False)

+----------+-----------------------+-------------------+-------------------+---------------+---------------+
|date      |time                   |months_between_date|months_between_time|add_months_date|add_months_time|
+----------+-----------------------+-------------------+-------------------+---------------+---------------+
|2014-02-28|2014-02-28 10:00:00.123|115.35             |115.35             |2014-05-28     |2014-05-28     |
|2016-03-29|2016-03-29 11:23:00.234|90.32              |90.32              |2016-06-29     |2016-06-29     |
|2018-04-20|2018-04-20 12:34:00.543|65.61              |65.6               |2018-07-20     |2018-07-20     |
|2019-05-12|2019-05-12 13:21:00.567|52.87              |52.86              |2019-08-12     |2019-08-12     |
+----------+-----------------------+-------------------+-------------------+---------------+---------------+

