In [0]:
from pyspark.sql.functions import *

In [0]:
datetimes = [
    ("2014-02-28", "2014-02-28 10:00:00.123"),
    ("2016-03-29", "2016-03-29 11:23:00.234"),
    ("2018-04-20", "2018-04-20 12:34:00.543"),
    ("2019-05-12", "2019-05-12 13:21:00.567")
]

In [0]:
datedf = spark.createDataFrame(datetimes, schema = 'date STRING, time STRING')

In [0]:
datedf.show(truncate=False)

+----------+-----------------------+
|date      |time                   |
+----------+-----------------------+
|2014-02-28|2014-02-28 10:00:00.123|
|2016-03-29|2016-03-29 11:23:00.234|
|2018-04-20|2018-04-20 12:34:00.543|
|2019-05-12|2019-05-12 13:21:00.567|
+----------+-----------------------+



In [0]:
help(trunc)

Help on function trunc in module pyspark.sql.functions:

trunc(date: 'ColumnOrName', format: str) -> pyspark.sql.column.Column
    Returns date truncated to the unit specified by the format.
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    date : :class:`~pyspark.sql.Column` or str
        input column of values to truncate.
    format : str
        'year', 'yyyy', 'yy' to truncate by year,
        or 'month', 'mon', 'mm' to truncate by month
        Other options are: 'week', 'quarter'
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        truncated date.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
    >>> df.select(trunc(df.d, 'year').alias('year')).collect()
    [Row(year=datetime.date(1997, 1, 1))]
    >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
    [Row(month=datetime.date(1997, 2, 1))]



In [0]:
help(date_trunc)

Help on function date_trunc in module pyspark.sql.functions:

date_trunc(format: str, timestamp: 'ColumnOrName') -> pyspark.sql.column.Column
    Returns timestamp truncated to the unit specified by the format.
    
    .. versionadded:: 2.3.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    format : str
        'year', 'yyyy', 'yy' to truncate by year,
        'month', 'mon', 'mm' to truncate by month,
        'day', 'dd' to truncate by day,
        Other options are:
        'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
    timestamp : :class:`~pyspark.sql.Column` or str
        input column of values to truncate.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        truncated timestamp.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t'])
    >>> df.select(date_trunc('year', df.t).alias('year')).collect()
    [Row(year=datetime.dateti


-  Get beginning month date using date field and beginning year date using time field

In [0]:
datedf. \
    withColumn("date_trunc_by_month", trunc("date","MM")). \
    withColumn("time_trunc_by_year", trunc("time", "yy")). \
    show(truncate=False)

+----------+-----------------------+-------------------+------------------+
|date      |time                   |date_trunc_by_month|time_trunc_by_year|
+----------+-----------------------+-------------------+------------------+
|2014-02-28|2014-02-28 10:00:00.123|2014-02-01         |2014-01-01        |
|2016-03-29|2016-03-29 11:23:00.234|2016-03-01         |2016-01-01        |
|2018-04-20|2018-04-20 12:34:00.543|2018-04-01         |2018-01-01        |
|2019-05-12|2019-05-12 13:21:00.567|2019-05-01         |2019-01-01        |
+----------+-----------------------+-------------------+------------------+



- Get beginning hour time using date and time field

In [0]:
help(date_trunc)

Help on function date_trunc in module pyspark.sql.functions:

date_trunc(format: str, timestamp: 'ColumnOrName') -> pyspark.sql.column.Column
    Returns timestamp truncated to the unit specified by the format.
    
    .. versionadded:: 2.3.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Parameters
    ----------
    format : str
        'year', 'yyyy', 'yy' to truncate by year,
        'month', 'mon', 'mm' to truncate by month,
        'day', 'dd' to truncate by day,
        Other options are:
        'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
    timestamp : :class:`~pyspark.sql.Column` or str
        input column of values to truncate.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        truncated timestamp.
    
    Examples
    --------
    >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t'])
    >>> df.select(date_trunc('year', df.t).alias('year')).collect()
    [Row(year=datetime.dateti

In [0]:
datedf. \
    withColumn("date_trunc_by_month", date_trunc('MM', "date")). \
    withColumn("time_trunc_by_year", date_trunc('yy', "time")). \
    show(truncate = False)

+----------+-----------------------+-------------------+-------------------+
|date      |time                   |date_trunc_by_month|time_trunc_by_year |
+----------+-----------------------+-------------------+-------------------+
|2014-02-28|2014-02-28 10:00:00.123|2014-02-01 00:00:00|2014-01-01 00:00:00|
|2016-03-29|2016-03-29 11:23:00.234|2016-03-01 00:00:00|2016-01-01 00:00:00|
|2018-04-20|2018-04-20 12:34:00.543|2018-04-01 00:00:00|2018-01-01 00:00:00|
|2019-05-12|2019-05-12 13:21:00.567|2019-05-01 00:00:00|2019-01-01 00:00:00|
+----------+-----------------------+-------------------+-------------------+



In [0]:
datedf. \
    withColumn("date_dt", date_trunc("HOUR", "date")). \
    withColumn("time_dt", date_trunc("HOUR", "time")). \
    withColumn("time_dt1", date_trunc("dd", "time")). \
    show(truncate=False)

+----------+-----------------------+-------------------+-------------------+-------------------+
|date      |time                   |date_dt            |time_dt            |time_dt1           |
+----------+-----------------------+-------------------+-------------------+-------------------+
|2014-02-28|2014-02-28 10:00:00.123|2014-02-28 00:00:00|2014-02-28 10:00:00|2014-02-28 00:00:00|
|2016-03-29|2016-03-29 11:23:00.234|2016-03-29 00:00:00|2016-03-29 11:00:00|2016-03-29 00:00:00|
|2018-04-20|2018-04-20 12:34:00.543|2018-04-20 00:00:00|2018-04-20 12:00:00|2018-04-20 00:00:00|
|2019-05-12|2019-05-12 13:21:00.567|2019-05-12 00:00:00|2019-05-12 13:00:00|2019-05-12 00:00:00|
+----------+-----------------------+-------------------+-------------------+-------------------+

