In [0]:
from pyspark.sql.functions import *

In [0]:
datetimes = [
    ("2014-02-28", "2014-02-28 10:00:00.123"),
    ("2016-03-29", "2016-03-29 11:23:00.234"),
    ("2018-04-20", "2018-04-20 12:34:00.543"),
    ("2019-05-12", "2019-05-12 13:21:00.567")
]

In [0]:
datedf = spark.createDataFrame(datetimes, schema = 'date STRING, time STRING')

In [0]:
datedf.show(truncate=False)

+----------+-----------------------+
|date      |time                   |
+----------+-----------------------+
|2014-02-28|2014-02-28 10:00:00.123|
|2016-03-29|2016-03-29 11:23:00.234|
|2018-04-20|2018-04-20 12:34:00.543|
|2019-05-12|2019-05-12 13:21:00.567|
+----------+-----------------------+



In [0]:
help(date_format)

Help on function date_format in module pyspark.sql.functions:

date_format(date: 'ColumnOrName', format: str) -> pyspark.sql.column.Column
    Converts a date/timestamp/string to a value of string in the format specified by the date
    format given by the second argument.
    
    A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
    pattern letters of `datetime pattern`_. can be used.
    
    .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
    
    .. versionadded:: 1.5.0
    
    .. versionchanged:: 3.4.0
        Supports Spark Connect.
    
    Notes
    -----
    Whenever possible, use specialized functions like `year`.
    
    Parameters
    ----------
    date : :class:`~pyspark.sql.Column` or str
        input column of values to format.
    format: str
        format to use to represent datetime values.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        string value repres

- Get the year and month from both date and time columns using yyyyMM format. Also make sure that the data type is converted to integer

In [0]:
datedf. \
    withColumn("date_ym", date_format("date", "yyyyMM")). \
    withColumn("time_ym", date_format("time", "yyyyMM")). \
    show(truncate=False)

+----------+-----------------------+-------+-------+
|date      |time                   |date_ym|time_ym|
+----------+-----------------------+-------+-------+
|2014-02-28|2014-02-28 10:00:00.123|201402 |201402 |
|2016-03-29|2016-03-29 11:23:00.234|201603 |201603 |
|2018-04-20|2018-04-20 12:34:00.543|201804 |201804 |
|2019-05-12|2019-05-12 13:21:00.567|201905 |201905 |
+----------+-----------------------+-------+-------+




- yyyy : year
- MM   : month
- dd   : day of the month
- DD   : Julian day (day of the year)
- HH   : 24 hour format
- hh   : 12 hour format
- mm   : minute
- ss   : second
- SSS  : milli second

In [0]:
datedf. \
    withColumn("date_ym", date_format("date", "yyyyMM").cast('int')). \
    withColumn("time_ym", date_format("time", "yyyyMM").cast('int')). \
    show(truncate=False)

+----------+-----------------------+-------+-------+
|date      |time                   |date_ym|time_ym|
+----------+-----------------------+-------+-------+
|2014-02-28|2014-02-28 10:00:00.123|201402 |201402 |
|2016-03-29|2016-03-29 11:23:00.234|201603 |201603 |
|2018-04-20|2018-04-20 12:34:00.543|201804 |201804 |
|2019-05-12|2019-05-12 13:21:00.567|201905 |201905 |
+----------+-----------------------+-------+-------+



In [0]:
datedf. \
    withColumn("date_dt", date_format("date", "yyyyMMddHHmmss")). \
    withColumn("time_ts", date_format("time", "yyyyMMddHHmmss")). \
    show(truncate=False)

+----------+-----------------------+--------------+--------------+
|date      |time                   |date_dt       |time_ts       |
+----------+-----------------------+--------------+--------------+
|2014-02-28|2014-02-28 10:00:00.123|20140228000000|20140228100000|
|2016-03-29|2016-03-29 11:23:00.234|20160329000000|20160329112300|
|2018-04-20|2018-04-20 12:34:00.543|20180420000000|20180420123400|
|2019-05-12|2019-05-12 13:21:00.567|20190512000000|20190512132100|
+----------+-----------------------+--------------+--------------+



In [0]:
datedf. \
    withColumn("date_yd", date_format("date", "yyyyDDD")). \
    withColumn("time_yd", date_format("time", "yyyyDDD")). \
    show(truncate=False)

+----------+-----------------------+-------+-------+
|date      |time                   |date_yd|time_yd|
+----------+-----------------------+-------+-------+
|2014-02-28|2014-02-28 10:00:00.123|2014059|2014059|
|2016-03-29|2016-03-29 11:23:00.234|2016089|2016089|
|2018-04-20|2018-04-20 12:34:00.543|2018110|2018110|
|2019-05-12|2019-05-12 13:21:00.567|2019132|2019132|
+----------+-----------------------+-------+-------+



In [0]:
datedf. \
    withColumn("date_yd", date_format("date", "MMMM d, yyyy")). \
    show(truncate=False)

+----------+-----------------------+-----------------+
|date      |time                   |date_yd          |
+----------+-----------------------+-----------------+
|2014-02-28|2014-02-28 10:00:00.123|February 28, 2014|
|2016-03-29|2016-03-29 11:23:00.234|March 29, 2016   |
|2018-04-20|2018-04-20 12:34:00.543|April 20, 2018   |
|2019-05-12|2019-05-12 13:21:00.567|May 12, 2019     |
+----------+-----------------------+-----------------+



In [0]:
# name of weekday
datedf. \
    withColumn("date_abbr", date_format("date", "EE")). \
    show(truncate=False)

+----------+-----------------------+---------+
|date      |time                   |date_abbr|
+----------+-----------------------+---------+
|2014-02-28|2014-02-28 10:00:00.123|Fri      |
|2016-03-29|2016-03-29 11:23:00.234|Tue      |
|2018-04-20|2018-04-20 12:34:00.543|Fri      |
|2019-05-12|2019-05-12 13:21:00.567|Sun      |
+----------+-----------------------+---------+



In [0]:
# full name of weekday
datedf. \
    withColumn("date_abbr", date_format("date", "EEEE")). \
    show(truncate=False)

+----------+-----------------------+---------+
|date      |time                   |date_abbr|
+----------+-----------------------+---------+
|2014-02-28|2014-02-28 10:00:00.123|Friday   |
|2016-03-29|2016-03-29 11:23:00.234|Tuesday  |
|2018-04-20|2018-04-20 12:34:00.543|Friday   |
|2019-05-12|2019-05-12 13:21:00.567|Sunday   |
+----------+-----------------------+---------+

